cookbook/LiteLLM_HuggingFace.ipynb
Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface
!pip install litellm
Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.
In order to use litellm with Hugging Face Inference Providers, you need to set model=huggingface/<provider>/<model-id>.
Example: huggingface/together/deepseek-ai/DeepSeek-R1 to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.
import os
from litellm import completion
# You can create a HF token here: https://huggingface.co/settings/tokens
os.environ["HF_TOKEN"] = "hf_xxxxxx"
# Call DeepSeek-R1 model through Together AI
response = completion(
model="huggingface/together/deepseek-ai/DeepSeek-R1",
messages=[{"content": "How many r's are in the word `strawberry`?", "role": "user"}],
)
print(response)
import os
from litellm import completion
os.environ["HF_TOKEN"] = "hf_xxxxxx"
response = completion(
model="huggingface/together/deepseek-ai/DeepSeek-R1",
messages=[
{
"role": "user",
"content": "How many r's are in the word `strawberry`?",
}
],
stream=True,
)
for chunk in response:
print(chunk)
from litellm import completion
# Set your Hugging Face Token
os.environ["HF_TOKEN"] = "hf_xxxxxx"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://awsmp-logos.s3.amazonaws.com/seller-xw5kijmvmzasy/c233c9ade2ccb5491072ae232c814942.png",
},
},
],
}
]
response = completion(
model="huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct",
messages=messages,
)
print(response.choices[0])
import os
from litellm import completion
# Set your Hugging Face Token
os.environ["HF_TOKEN"] = "hf_xxxxxx"
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
response = completion(
model="huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct", messages=messages, tools=tools, tool_choice="auto"
)
print(response)
Steps to use
api_base to your deployed api basehuggingface/tgi so that litellm knows it's a huggingface Deployed Inference Endpoint.import os
import litellm
response = litellm.completion(
model="huggingface/tgi",
messages=[{"content": "Hello, how are you?", "role": "user"}],
api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/",
)
print(response)