packages/graphrag-llm/notebooks/01_basic.ipynb
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import os
from collections.abc import AsyncIterator, Iterator
from dotenv import load_dotenv
from graphrag_llm.completion import LLMCompletion, create_completion
from graphrag_llm.config import AuthMethod, ModelConfig
from graphrag_llm.types import LLMCompletionChunk, LLMCompletionResponse
load_dotenv()
api_key = os.getenv("GRAPHRAG_API_KEY")
model_config = ModelConfig(
model_provider="azure",
model=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
azure_deployment_name=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
api_base=os.getenv("GRAPHRAG_API_BASE"),
api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
api_key=api_key,
auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)
llm_completion: LLMCompletion = create_completion(model_config)
response: LLMCompletionResponse | Iterator[LLMCompletionChunk] = (
llm_completion.completion(
messages="What is the capital of France?",
)
)
if isinstance(response, Iterator):
# Streaming response
for chunk in response:
print(chunk.choices[0].delta.content or "", end="", flush=True)
else:
# Non-streaming response
print(response.choices[0].message.content)
# Or alternatively, access via the content property
# This is equivalent to the above line, getting the content of the first choice
print(response.content)
print("Full Response:")
print(response.model_dump_json(indent=2)) # type: ignore
response: LLMCompletionResponse = await llm_completion.completion_async(
messages="What is the capital of France?",
) # type: ignore
print(response.content)
response = llm_completion.completion(
messages="What is the capital of France?",
stream=True,
)
if isinstance(response, Iterator):
# Streaming response
for chunk in response:
print(chunk.choices[0].delta.content or "", end="", flush=True)
response = await llm_completion.completion_async(
messages="What is the capital of France?",
stream=True,
)
if isinstance(response, AsyncIterator):
# Streaming response
async for chunk in response:
print(chunk.choices[0].delta.content or "", end="", flush=True)
The completion API adheres to litellm completion API and thus the OpanAI SDK API. The messages parameter can be one of the following:
str: Raw string for the prompt.list[dict[str, Any]]: A list of dicts in the form {"role": "user|system|...", "content": "..."}list[ChatCompletionMessageParam]: A list of OpenAI ChatCompletionMessageParam. graphrag_llm.utils provides a ChatCompletionMessageParamBuilder to help construct these objects. See the message builder notebook for more details on using ChatCompletionMessageParamBuilder.from graphrag_llm.utils import (
CompletionMessagesBuilder,
)
# raw string input
response1: LLMCompletionResponse = llm_completion.completion(
messages="What is the capital of France?"
) # type: ignore
print(response1.content)
# list of message dicts input
response2: LLMCompletionResponse = llm_completion.completion(
messages=[{"role": "user", "content": "What is the capital of France?"}]
) # type: ignore
print(response2.content)
# using the builder to create complex message
messages = (
CompletionMessagesBuilder()
.add_system_message(
"You are a helpful assistant that likes to talk like a pirate. Respond as if you are a pirate using pirate speak."
)
.add_user_message("Is pluto a planet? Respond with a yes or no.")
.add_assistant_message("Aye, matey! Pluto be a planet in me book.")
.add_user_message("Are you sure? I want the truth. Can you elaborate?")
.build()
)
response3: LLMCompletionResponse = llm_completion.completion(messages=messages) # type: ignore
print(response3.content)
from graphrag_llm.embedding import LLMEmbedding, create_embedding
from graphrag_llm.types import LLMEmbeddingResponse
embedding_config = ModelConfig(
model_provider="azure",
model=os.getenv("GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small"),
azure_deployment_name=os.getenv(
"GRAPHRAG_LLM_EMBEDDING_MODEL", "text-embedding-3-small"
),
api_base=os.getenv("GRAPHRAG_API_BASE"),
api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
api_key=api_key,
auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)
llm_embedding: LLMEmbedding = create_embedding(embedding_config)
embeddings_batch: LLMEmbeddingResponse = llm_embedding.embedding(
input=["Hello world", "How are you?"]
)
for embedding in embeddings_batch.embeddings:
print(embedding[0:3])
.embedding batches by default, it takes a list of strings to embed. If embedding a single string then you can use .first_embedding on the response to obtain the first embedding.
embedding_response = llm_embedding.embedding(
input=["This is a single input string for embedding."]
)
print(embedding_response.first_embedding[0:3])
embeddings_batch = await llm_embedding.embedding_async(
input=["Hello world", "How are you?"]
)
for embedding in embeddings_batch.embeddings:
print(embedding[0:3])