OpenAI Agent + Query Engine Experimental Cookbook

In this notebook, we try out the FunctionAgent across a variety of query engine tools and datasets. We explore how FunctionAgent can compare/replace existing workflows solved by our retrievers/query engines.

Auto retrieval
Joint SQL and vector search

NOTE: Any Text-to-SQL application should be aware that executing arbitrary SQL queries can be a security risk. It is recommended to take precautions as needed, such as using restricted roles, read-only databases, sandboxing, etc.

AutoRetrieval from a Vector Database

Our existing "auto-retrieval" capabilities (in VectorIndexAutoRetriever) allow an LLM to infer the right query parameters for a vector database - including both the query string and metadata filter.

Since the OpenAI Function API can infer function parameters, we explore its capabilities in performing auto-retrieval here.

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python

%pip install llama-index
%pip install llama-index-llms-openai
%pip install llama-index-readers-wikipedia
%pip install llama-index-vector-stores-pinecone

python

import os

os.environ["PINECONE_API_KEY"] = "..."
os.environ["OPENAI_API_KEY"] = "..."

python

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

python

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

python

# dimensions are for text-embedding-3-small
pc.create_index(
    name="quickstart-index",
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# may need to wait for index to be created
import time

time.sleep(10)

python

index = pc.Index("quickstart-index")

python

# Optional: delete data in your pinecone index
# index.delete(deleteAll=True, namespace="test")

python

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore

python

from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text=(
            "Michael Jordan is a retired professional basketball player,"
            " widely regarded as one of the greatest basketball players of all"
            " time."
        ),
        metadata={
            "category": "Sports",
            "country": "United States",
            "gender": "male",
            "born": 1963,
        },
    ),
    TextNode(
        text=(
            "Angelina Jolie is an American actress, filmmaker, and"
            " humanitarian. She has received numerous awards for her acting"
            " and is known for her philanthropic work."
        ),
        metadata={
            "category": "Entertainment",
            "country": "United States",
            "gender": "female",
            "born": 1975,
        },
    ),
    TextNode(
        text=(
            "Elon Musk is a business magnate, industrial designer, and"
            " engineer. He is the founder, CEO, and lead designer of SpaceX,"
            " Tesla, Inc., Neuralink, and The Boring Company."
        ),
        metadata={
            "category": "Business",
            "country": "United States",
            "gender": "male",
            "born": 1971,
        },
    ),
    TextNode(
        text=(
            "Rihanna is a Barbadian singer, actress, and businesswoman. She"
            " has achieved significant success in the music industry and is"
            " known for her versatile musical style."
        ),
        metadata={
            "category": "Music",
            "country": "Barbados",
            "gender": "female",
            "born": 1988,
        },
    ),
    TextNode(
        text=(
            "Cristiano Ronaldo is a Portuguese professional footballer who is"
            " considered one of the greatest football players of all time. He"
            " has won numerous awards and set multiple records during his"
            " career."
        ),
        metadata={
            "category": "Sports",
            "country": "Portugal",
            "gender": "male",
            "born": 1985,
        },
    ),
]

python

from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import StorageContext

vector_store = PineconeVectorStore(pinecone_index=index, namespace="test")
storage_context = StorageContext.from_defaults(vector_store=vector_store)

python

from llama_index.core import VectorStoreIndex

index = VectorStoreIndex(nodes, storage_context=storage_context)

Define Function Tool

Here we define the function interface, which is passed to OpenAI to perform auto-retrieval.

We were not able to get OpenAI to work with nested pydantic objects or tuples as arguments, so we converted the metadata filter keys and values into lists for the function API to work with.

python

# define function tool
from llama_index.core.tools import FunctionTool
from llama_index.core.vector_stores import (
    VectorStoreInfo,
    MetadataInfo,
    MetadataFilter,
    MetadataFilters,
    FilterCondition,
    FilterOperator,
)
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

from typing import List, Tuple, Any
from pydantic import BaseModel, Field


# define vector store info describing schema of vector store
vector_store_info = VectorStoreInfo(
    content_info="brief biography of celebrities",
    metadata_info=[
        MetadataInfo(
            name="category",
            type="str",
            description=(
                "Category of the celebrity, one of [Sports, Entertainment,"
                " Business, Music]"
            ),
        ),
        MetadataInfo(
            name="country",
            type="str",
            description=(
                "Country of the celebrity, one of [United States, Barbados,"
                " Portugal]"
            ),
        ),
        MetadataInfo(
            name="gender",
            type="str",
            description=("Gender of the celebrity, one of [male, female]"),
        ),
        MetadataInfo(
            name="born",
            type="int",
            description=("Born year of the celebrity, could be any integer"),
        ),
    ],
)

Define AutoRetrieve Functions

python

from typing import Any, Annotated


async def auto_retrieve_fn(
    query: Annotated[str, "The natural language query/question to answer."],
    filter_key_list: Annotated[
        List[str], "List of metadata filter field names"
    ],
    filter_value_list: Annotated[
        List[Any],
        "List of metadata filter field values (corresponding to names in filter_key_list)",
    ],
    filter_operator_list: Annotated[
        List[str],
        "Metadata filters conditions (could be one of <, <=, >, >=, ==, !=)",
    ],
    filter_condition: Annotated[
        str, "Metadata filters condition values (could be AND or OR)"
    ],
    top_k: Annotated[
        int, "The number of results to return from the vector database."
    ],
):
    """Auto retrieval function.

    Performs auto-retrieval from a vector database, and then applies a set of filters.

    """
    query = query or "Query"

    metadata_filters = [
        MetadataFilter(key=k, value=v, operator=op)
        for k, v, op in zip(
            filter_key_list, filter_value_list, filter_operator_list
        )
    ]
    retriever = VectorIndexRetriever(
        index,
        filters=MetadataFilters(
            filters=metadata_filters, condition=filter_condition.lower()
        ),
        top_k=top_k,
    )
    query_engine = RetrieverQueryEngine.from_args(retriever)

    response = await query_engine.aquery(query)
    return str(response)


description = f"""\
Use this tool to look up biographical information about celebrities.
The vector database schema is given below:

<schema>
{vector_store_info.model_dump_json()}
</schema>
"""

auto_retrieve_tool = FunctionTool.from_defaults(
    auto_retrieve_fn,
    name="celebrity_bios",
    description=description,
)

Initialize Agent

python

from llama_index.core.agent.workflow import FunctionAgent
from llama_index.core.workflow import Context
from llama_index.llms.openai import OpenAI

agent = FunctionAgent(
    tools=[auto_retrieve_tool],
    llm=OpenAI(model="gpt-4o"),
    system_prompt=(
        "You are a helpful assistant that can answer questions about celebrities by writing a filtered query to a vector database. "
        "Unless the user is asking to compare things, you generally only need to make one call to the retriever."
    ),
)

# hold the context/session state for the agent
ctx = Context(agent)

python

from llama_index.core.agent.workflow import (
    ToolCallResult,
    ToolCall,
    AgentStream,
    AgentInput,
    AgentOutput,
)

handler = agent.run(
    "Tell me about two celebrities from the United States. ", ctx=ctx
)

async for ev in handler.stream_events():
    if isinstance(ev, ToolCallResult):
        print(
            f"\nCalled tool {ev.tool_name} with args {ev.tool_kwargs}, got response: {ev.tool_output}"
        )
    elif isinstance(ev, AgentStream):
        print(ev.delta, end="", flush=True)

response = await handler

python

handler = agent.run("Tell me about two celebrities born after 1980. ", ctx=ctx)

async for ev in handler.stream_events():
    if isinstance(ev, ToolCallResult):
        print(
            f"\nCalled tool {ev.tool_name} with args {ev.tool_kwargs}, got response: {ev.tool_output}"
        )
    elif isinstance(ev, AgentStream):
        print(ev.delta, end="", flush=True)

response = await handler

python

response = await agent.run(
    "Tell me about few celebrities under category business and born after 1950. ",
    ctx=ctx,
)
print(str(response))

Joint Text-to-SQL and Semantic Search

This is currently handled by our SQLAutoVectorQueryEngine.

Let's try implementing this by giving our FunctionAgent access to two query tools: SQL and Vector

Load and Index Structured Data

We load sample structured datapoints into a SQL db and index it.

python

from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
    column,
)
from llama_index.core import SQLDatabase
from llama_index.core.indices import SQLStructStoreIndex

engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()

python

# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("country", String(16), nullable=False),
)

metadata_obj.create_all(engine)

python

# print tables
metadata_obj.tables.keys()

python

from sqlalchemy import insert

rows = [
    {"city_name": "Toronto", "population": 2930000, "country": "Canada"},
    {"city_name": "Tokyo", "population": 13960000, "country": "Japan"},
    {"city_name": "Berlin", "population": 3645000, "country": "Germany"},
]
for row in rows:
    stmt = insert(city_stats_table).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)

python

with engine.connect() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM city_stats")
    print(cursor.fetchall())

python

sql_database = SQLDatabase(engine, include_tables=["city_stats"])

python

from llama_index.core.query_engine import NLSQLTableQueryEngine

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["city_stats"],
)

Load and Index Unstructured Data

We load unstructured data into a vector index backed by Pinecone

python

# install wikipedia python package
%pip install wikipedia llama-index-readers-wikipedia

python

from llama_index.readers.wikipedia import WikipediaReader

cities = ["Toronto", "Berlin", "Tokyo"]
wiki_docs = WikipediaReader().load_data(pages=cities)

python

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

python

# dimensions are for text-embedding-3-small
pc.create_index(
    name="quickstart-sql",
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# may need to wait for index to be created
import time

time.sleep(10)

python

# define pinecone index
index = pc.Index("quickstart-sql")

python

# OPTIONAL: delete all
index.delete(deleteAll=True)

python

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# define node parser and LLM
Settings.llm = OpenAI(temperature=0, model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.node_parser = TokenTextSplitter(chunk_size=1024)

# define pinecone vector index
vector_store = PineconeVectorStore(
    pinecone_index=index, namespace="wiki_cities"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_index = VectorStoreIndex([], storage_context=storage_context)

python

# Insert documents into vector index
# Each document has metadata of the city attached
for city, wiki_doc in zip(cities, wiki_docs):
    nodes = Settings.node_parser.get_nodes_from_documents([wiki_doc])
    # add metadata to each node
    for node in nodes:
        node.metadata = {"title": city}
    vector_index.insert_nodes(nodes)

Define Query Engines / Tools

python

from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.core.vector_stores import MetadataInfo, VectorStoreInfo
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.tools import QueryEngineTool


vector_store_info = VectorStoreInfo(
    content_info="articles about different cities",
    metadata_info=[
        MetadataInfo(
            name="title", type="str", description="The name of the city"
        ),
    ],
)

# pre-built auto-retriever, this works similarly to our custom auto-retriever above
vector_auto_retriever = VectorIndexAutoRetriever(
    vector_index, vector_store_info=vector_store_info
)

retriever_query_engine = RetrieverQueryEngine.from_args(
    vector_auto_retriever,
)

python

sql_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="sql_tool",
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: city_stats, containing the population/country of"
        " each city"
    ),
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine=retriever_query_engine,
    name="vector_tool",
    description=(
        "Useful for answering semantic questions about different cities"
    ),
)

Initialize Agent

python

from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.openai import OpenAI
from llama_index.core.workflow import Context

agent = FunctionAgent(
    tools=[sql_tool, vector_tool],
    llm=OpenAI(model="gpt-4o"),
)

# hold the context/session state for the agent
ctx = Context(agent)

python

from llama_index.core.agent.workflow import (
    ToolCallResult,
    ToolCall,
    AgentStream,
    AgentInput,
    AgentOutput,
)

handler = agent.run(
    "Tell me about the arts and culture of the city with the highest population. ",
    ctx=ctx,
)

async for ev in handler.stream_events():
    if isinstance(ev, ToolCallResult):
        print(
            f"\nCalled tool {ev.tool_name} with args {ev.tool_kwargs}, got response: {ev.tool_output}"
        )
    elif isinstance(ev, AgentStream):
        print(ev.delta, end="", flush=True)

response = await handler

python

handler = agent.run("Tell me about the history of Berlin", ctx=ctx)

async for ev in handler.stream_events():
    if isinstance(ev, ToolCallResult):
        print(
            f"\nCalled tool {ev.tool_name} with args {ev.tool_kwargs}, got response: {ev.tool_output}"
        )
    elif isinstance(ev, AgentStream):
        print(ev.delta, end="", flush=True)

response = await handler

python

response = await agent.run(
    "Can you give me the country corresponding to each city?", ctx=ctx
)

print(str(response))