cookbook/liteLLM_Baseten.ipynb
This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.
response = completion(
model="baseten/openai/gpt-oss-120b",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens=1000,
temperature=0.7
)
%pip install litellm
import os
from litellm import completion
# Set your Baseten API key
os.environ['BASETEN_API_KEY'] = "" #@param {type:"string"}
# Test message
messages = [{"role": "user", "content": "What is AGI?"}]
Simple completion with the GPT-OSS 120B model
print("=== Basic Completion ===")
response = completion(
model="baseten/openai/gpt-oss-120b",
messages=messages,
max_tokens=1000,
temperature=0.7,
top_p=0.9,
presence_penalty=0.1,
frequency_penalty=0.1,
)
print(f"Response: {response.choices[0].message.content}")
print(f"Usage: {response.usage}")
Streaming completion with usage statistics
print("=== Streaming Completion ===")
response = completion(
model="baseten/openai/gpt-oss-120b",
messages=[{"role": "user", "content": "Write a short poem about AI"}],
stream=True,
max_tokens=500,
temperature=0.8,
stream_options={
"include_usage": True,
"continuous_usage_stats": True
},
)
print("Streaming response:")
for chunk in response:
if chunk.choices and chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print("\n")