Back to Llama Index

API Call Observability

docs/examples/instrumentation/observe_api_calls.ipynb

0.14.212.0 KB
Original Source

API Call Observability

Using the new instrumentation package, we can get direct observability into API calls made using LLMs and emebdding models.

In this notebook, we explore doing this in order to add observability to LLM and embedding calls.

python
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

Defining an Event Handler

python
from llama_index.core.instrumentation.event_handlers import BaseEventHandler
from llama_index.core.instrumentation.events.llm import (
    LLMCompletionEndEvent,
    LLMChatEndEvent,
)
from llama_index.core.instrumentation.events.embedding import EmbeddingEndEvent


class ModelEventHandler(BaseEventHandler):
    @classmethod
    def class_name(cls) -> str:
        """Class name."""
        return "ModelEventHandler"

    def handle(self, event) -> None:
        """Logic for handling event."""
        if isinstance(event, LLMCompletionEndEvent):
            print(f"LLM Prompt length: {len(event.prompt)}")
            print(f"LLM Completion: {str(event.response.text)}")
        elif isinstance(event, LLMChatEndEvent):
            messages_str = "\n".join([str(x) for x in event.messages])
            print(f"LLM Input Messages length: {len(messages_str)}")
            print(f"LLM Response: {str(event.response.message)}")
        elif isinstance(event, EmbeddingEndEvent):
            print(f"Embedding {len(event.chunks)} text chunks")

Attaching the Event Handler

python
from llama_index.core.instrumentation import get_dispatcher

# root dispatcher
root_dispatcher = get_dispatcher()

# register event handler
root_dispatcher.add_event_handler(ModelEventHandler())

Invoke the Handler!

python
from llama_index.core import Document, VectorStoreIndex

index = VectorStoreIndex.from_documents([Document.example()])
python
query_engine = index.as_query_engine()
response = query_engine.query("Tell me about LLMs?")
python
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Repeat only these two words: Hello world!")
for r in response.response_gen:
    ...