You can create a HF token here: https://huggingface.co/settings/tokens - Litellm

LiteLLM Hugging Face

Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface

python

!pip install litellm

Serverless Inference Providers

Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.

In order to use litellm with Hugging Face Inference Providers, you need to set model=huggingface/<provider>/<model-id>.

Example: huggingface/together/deepseek-ai/DeepSeek-R1 to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.

python

import os
from litellm import completion

# You can create a HF token here: https://huggingface.co/settings/tokens
os.environ["HF_TOKEN"] = "hf_xxxxxx"

# Call DeepSeek-R1 model through Together AI
response = completion(
    model="huggingface/together/deepseek-ai/DeepSeek-R1",
    messages=[{"content": "How many r's are in the word `strawberry`?", "role": "user"}],
)
print(response)

Streaming

python

import os
from litellm import completion

os.environ["HF_TOKEN"] = "hf_xxxxxx"

response = completion(
    model="huggingface/together/deepseek-ai/DeepSeek-R1",
    messages=[
        {
            "role": "user",
            "content": "How many r's are in the word `strawberry`?",
            
        }
    ],
    stream=True,
)

for chunk in response:
    print(chunk)

With images as input

python

from litellm import completion

# Set your Hugging Face Token
os.environ["HF_TOKEN"] = "hf_xxxxxx"

messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "What's in this image?"},
            {
                "type": "image_url",
                "image_url": {
                    "url": "https://awsmp-logos.s3.amazonaws.com/seller-xw5kijmvmzasy/c233c9ade2ccb5491072ae232c814942.png",
                },
            },
        ],
    }
]

response = completion(
    model="huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct",
    messages=messages,
)
print(response.choices[0])

Tools - Function Calling

python

import os
from litellm import completion


# Set your Hugging Face Token
os.environ["HF_TOKEN"] = "hf_xxxxxx"

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]

response = completion(
    model="huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct", messages=messages, tools=tools, tool_choice="auto"
)
print(response)

Hugging Face Dedicated Inference Endpoints

Steps to use

Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/
Set api_base to your deployed api base
set the model to huggingface/tgi so that litellm knows it's a huggingface Deployed Inference Endpoint.

python

import os
import litellm


response = litellm.completion(
    model="huggingface/tgi",
    messages=[{"content": "Hello, how are you?", "role": "user"}],
    api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/",
)
print(response)