docs/examples/instrumentation/observe_api_calls.ipynb
Using the new instrumentation package, we can get direct observability into API calls made using LLMs and emebdding models.
In this notebook, we explore doing this in order to add observability to LLM and embedding calls.
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
from llama_index.core.instrumentation.event_handlers import BaseEventHandler
from llama_index.core.instrumentation.events.llm import (
LLMCompletionEndEvent,
LLMChatEndEvent,
)
from llama_index.core.instrumentation.events.embedding import EmbeddingEndEvent
class ModelEventHandler(BaseEventHandler):
@classmethod
def class_name(cls) -> str:
"""Class name."""
return "ModelEventHandler"
def handle(self, event) -> None:
"""Logic for handling event."""
if isinstance(event, LLMCompletionEndEvent):
print(f"LLM Prompt length: {len(event.prompt)}")
print(f"LLM Completion: {str(event.response.text)}")
elif isinstance(event, LLMChatEndEvent):
messages_str = "\n".join([str(x) for x in event.messages])
print(f"LLM Input Messages length: {len(messages_str)}")
print(f"LLM Response: {str(event.response.message)}")
elif isinstance(event, EmbeddingEndEvent):
print(f"Embedding {len(event.chunks)} text chunks")
from llama_index.core.instrumentation import get_dispatcher
# root dispatcher
root_dispatcher = get_dispatcher()
# register event handler
root_dispatcher.add_event_handler(ModelEventHandler())
from llama_index.core import Document, VectorStoreIndex
index = VectorStoreIndex.from_documents([Document.example()])
query_engine = index.as_query_engine()
response = query_engine.query("Tell me about LLMs?")
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Repeat only these two words: Hello world!")
for r in response.response_gen:
...