cookbook/liteLLM_Ollama.ipynb
!pip install litellm # version 0.1.724 or higher
from litellm import completion
response = completion(
model="ollama/llama2",
messages=[{ "content": "respond in 20 words. who are you?","role": "user"}],
api_base="http://localhost:11434",
stream=True
)
print(response)
for chunk in response:
print(chunk['choices'][0]['delta'])
# litellm uses async_generator for ollama async streaming, ensure it's installed
!pip install async_generator
import litellm
async def async_ollama():
response = await litellm.acompletion(
model="ollama/llama2",
messages=[{ "content": "what's the weather" ,"role": "user"}],
api_base="http://localhost:11434",
stream=True
)
async for chunk in response:
print(chunk)
result = await async_ollama()
print(result)
try:
async for chunk in result:
print(chunk)
except TypeError: # the last chunk is None from Ollama, this raises an error with async streaming
pass
from litellm import completion
response = completion(
model="ollama/llama2",
messages=[{ "content": "respond in 20 words. who are you?","role": "user"}],
api_base="http://localhost:11434"
)
print(response)