Basic Completion and Embedding Examples

Completion

python

# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

import os
from collections.abc import AsyncIterator, Iterator

from dotenv import load_dotenv
from graphrag_llm.completion import LLMCompletion, create_completion
from graphrag_llm.config import AuthMethod, ModelConfig
from graphrag_llm.types import LLMCompletionChunk, LLMCompletionResponse

load_dotenv()

api_key = os.getenv("GRAPHRAG_API_KEY")
model_config = ModelConfig(
    model_provider="azure",
    model=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    azure_deployment_name=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    api_base=os.getenv("GRAPHRAG_API_BASE"),
    api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
    api_key=api_key,
    auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)
llm_completion: LLMCompletion = create_completion(model_config)

response: LLMCompletionResponse | Iterator[LLMCompletionChunk] = (
    llm_completion.completion(
        messages="What is the capital of France?",
    )
)

if isinstance(response, Iterator):
    # Streaming response
    for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)
else:
    # Non-streaming response
    print(response.choices[0].message.content)
    # Or alternatively, access via the content property
    # This is equivalent to the above line, getting the content of the first choice
    print(response.content)

print("Full Response:")
print(response.model_dump_json(indent=2))  # type: ignore

Async Completion

python

response: LLMCompletionResponse = await llm_completion.completion_async(
    messages="What is the capital of France?",
)  # type: ignore
print(response.content)

Streaming Completion

python

response = llm_completion.completion(
    messages="What is the capital of France?",
    stream=True,
)

if isinstance(response, Iterator):
    # Streaming response
    for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)

Async Streaming Completion

python

response = await llm_completion.completion_async(
    messages="What is the capital of France?",
    stream=True,
)

if isinstance(response, AsyncIterator):
    # Streaming response
    async for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)

Completion Arguments

The completion API adheres to litellm completion API and thus the OpanAI SDK API. The messages parameter can be one of the following:

str: Raw string for the prompt.
list[dict[str, Any]]: A list of dicts in the form {"role": "user|system|...", "content": "..."}
list[ChatCompletionMessageParam]: A list of OpenAI ChatCompletionMessageParam. graphrag_llm.utils provides a ChatCompletionMessageParamBuilder to help construct these objects. See the message builder notebook for more details on using ChatCompletionMessageParamBuilder.

python

from graphrag_llm.utils import (
    CompletionMessagesBuilder,
)

# raw string input
response1: LLMCompletionResponse = llm_completion.completion(
    messages="What is the capital of France?"
)  # type: ignore
print(response1.content)

# list of message dicts input
response2: LLMCompletionResponse = llm_completion.completion(
    messages=[{"role": "user", "content": "What is the capital of France?"}]
)  # type: ignore
print(response2.content)

# using the builder to create complex message
messages = (
    CompletionMessagesBuilder()
    .add_system_message(
        "You are a helpful assistant that likes to talk like a pirate. Respond as if you are a pirate using pirate speak."
    )
    .add_user_message("Is pluto a planet? Respond with a yes or no.")
    .add_assistant_message("Aye, matey! Pluto be a planet in me book.")
    .add_user_message("Are you sure? I want the truth. Can you elaborate?")
    .build()
)

response3: LLMCompletionResponse = llm_completion.completion(messages=messages)  # type: ignore
print(response3.content)

Embedding

python

from graphrag_llm.embedding import LLMEmbedding, create_embedding
from graphrag_llm.types import LLMEmbeddingResponse

embedding_config = ModelConfig(
    model_provider="azure",
    model=os.getenv("GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small"),
    azure_deployment_name=os.getenv(
        "GRAPHRAG_LLM_EMBEDDING_MODEL", "text-embedding-3-small"
    ),
    api_base=os.getenv("GRAPHRAG_API_BASE"),
    api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
    api_key=api_key,
    auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)

llm_embedding: LLMEmbedding = create_embedding(embedding_config)

embeddings_batch: LLMEmbeddingResponse = llm_embedding.embedding(
    input=["Hello world", "How are you?"]
)
for embedding in embeddings_batch.embeddings:
    print(embedding[0:3])

First Embedding

.embedding batches by default, it takes a list of strings to embed. If embedding a single string then you can use .first_embedding on the response to obtain the first embedding.

python

embedding_response = llm_embedding.embedding(
    input=["This is a single input string for embedding."]
)

print(embedding_response.first_embedding[0:3])

Async Embedding

python

embeddings_batch = await llm_embedding.embedding_async(
    input=["Hello world", "How are you?"]
)

for embedding in embeddings_batch.embeddings:
    print(embedding[0:3])