examples/rag-retrieval-augmented-generation/simple-agentic-rag/main.ipynb
import wikipedia
from markdownify import markdownify
from tensorzero import AsyncTensorZeroGateway, ToolCall, ToolResult
# Kill the RAG process after a certain number of inferences to prevent infinite loops.
MAX_INFERENCES = 20
We initialize a TensorZero client that connects to the gateway launched via Docker Compose.
To keep things minimal in this example, we don't set up observability with ClickHouse.
See the Quick Start for a simple example that includes observability and the UI.
t0 = await AsyncTensorZeroGateway.build_http(
gateway_url="http://localhost:3000",
)
We define the tools that will be used by the model.
Here, we have a tool for searching Wikipedia and a tool for loading a Wikipedia page.
def search_wikipedia(tool_call: ToolCall) -> ToolResult:
"""
Searches Wikipedia for a given query and returns a list of search results.
Args:
tool_call (ToolCall): A tool call object containing the search query in its arguments.
Expected arguments: {"query": str}
Returns:
ToolResult: A tool result containing the newline-separated list of Wikipedia search results.
The result field contains the search results as a string.
"""
search_wikipedia_result = "\n".join(wikipedia.search(tool_call.arguments["query"]))
return ToolResult(
name="search_wikipedia",
id=tool_call.id,
result=search_wikipedia_result,
)
def load_wikipedia_page(tool_call: ToolCall) -> ToolResult:
"""
Loads and formats the content of a Wikipedia page.
Args:
tool_call (ToolCall): A tool call object containing the page title in its arguments.
Expected arguments: {"title": str}
Returns:
ToolResult: A tool result containing the formatted Wikipedia page content.
The result field contains the page URL and content in Markdown format.
If the page is not found or there's a disambiguation error, returns an error message.
"""
try:
page = wikipedia.page(tool_call.arguments["title"])
# Preprocess result by converting the HTML content to Markdown to reduce token usage
page_markdown = markdownify(page.html())
load_wikipedia_page_result = f"# URL\n\n{page.url}\n\n# CONTENT\n\n{page_markdown}"
except wikipedia.exceptions.PageError:
load_wikipedia_page_result = f"ERROR: page '{tool_call.arguments['title']}' not found."
except wikipedia.exceptions.DisambiguationError as e:
load_wikipedia_page_result = f"ERROR: disambiguation error for '{tool_call.arguments['title']}': {e}"
return ToolResult(
name="load_wikipedia_page",
id=tool_call.id,
result=load_wikipedia_page_result,
)
Here we define the function that will be used to ask a question to the multi-hop retrieval agent.
The function takes a question and launches a multi-hop retrieval process. The agent will make a number of tool calls to search for information and answer the question.
The function will return the answer to the question.
async def ask_question(question: str, verbose: bool = False):
"""
Asks a question to the multi-hop retrieval agent and returns the answer.
Args:
question (str): The question to ask the agent.
verbose (bool, optional): Whether to print verbose output. Defaults to False.
Returns:
str: The answer to the question.
"""
# Initialize the message history with the user's question
messages = [{"role": "user", "content": question}]
# The episode ID is used to track the agent's progress (`None` until the first inference)
episode_id = None
for _ in range(MAX_INFERENCES):
print()
response = await t0.inference(
function_name="multi_hop_rag_agent",
input={"messages": messages},
episode_id=episode_id,
)
# Append the assistant's response to the messages
messages.append({"role": "assistant", "content": response.content})
# Update the episode ID
episode_id = response.episode_id
# Start constructing the tool call results
output_content_blocks = []
for content_block in response.content:
if isinstance(content_block, ToolCall):
if verbose:
print(f"[Tool Call] {content_block.name}: {content_block.arguments}")
if content_block.name is None or content_block.arguments is None:
output_content_blocks.append(
ToolResult(
name=content_block.raw_name,
id=content_block.id,
result="ERROR: invalid tool call",
)
)
elif content_block.name == "search_wikipedia":
output_content_blocks.append(search_wikipedia(content_block))
elif content_block.name == "load_wikipedia_page":
output_content_blocks.append(load_wikipedia_page(content_block))
elif content_block.name == "think":
# The `think` tool is just used to plan the next steps, and there's no actual tool to call.
# Some providers like OpenAI require a tool result, so we'll provide an empty string.
output_content_blocks.append(
ToolResult(
name="think",
id=content_block.id,
result="",
)
)
elif content_block.name == "answer_question":
return content_block.arguments["answer"]
else:
# We don't need to do anything with other content blocks.
print(f"[Other Content Block] {content_block}")
messages.append({"role": "user", "content": output_content_blocks})
else:
# In a production setting, the model could attempt to generate an answer using available information
# when the search process is stopped; here, we simply throw an exception.
raise Exception(f"Failed to answer question after {MAX_INFERENCES} inferences.")
Let's try our RAG agent on a few questions.
The questions are fairly challenging. We present a rough research path that the agent can take to answer the question. GPT-4o Mini often gets them right, it's not always reliable.
await ask_question(
"What is a common dish in the hometown of the scientist that won the Nobel Prize for the discovery of the positron?",
verbose=True,
)
# Expected Answer: Nobel Prize for the discovery of the positron -> Carl D. Anderson -> New York City -> a popular NYC dish
await ask_question(
"What company developed the popular Chinese video game voiced by the same voice actor that voiced a wizard in the anime Konosuba?",
verbose=True,
)
# Expected Answer: Konosuba's wizard -> Megumin -> voiced by Rie Takahashi -> Chinese video game -> Genshin Impact -> developed by HoYoverse (miHoYo)
await ask_question(
"What is the national flower of the country where the mathematician who proved Fermat's Last Theorem was born?",
verbose=True,
)
# Expected Answer: Fermat's Last Theorem -> Andrew Wiles -> United Kingdom -> national flower -> Tudor rose (red rose)