docs/examples/query_engine/sub_question_query_engine.ipynb
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/query_engine/sub_question_query_engine.ipynb" target="_parent"></a>
In this tutorial, we showcase how to use a sub question query engine to tackle the problem of answering a complex query using multiple data sources.
It first breaks down the complex query into sub questions for each relevant data source,
then gather all the intermediate reponses and synthesizes a final response.
If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
!pip install llama-index
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
import nest_asyncio
nest_asyncio.apply()
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.core import Settings
# Using the LlamaDebugHandler to print the trace of the sub questions
# captured by the SUB_QUESTION callback event type
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
Settings.callback_manager = callback_manager
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
# load data
pg_essay = SimpleDirectoryReader(input_dir="./data/paul_graham/").load_data()
# build index and query engine
vector_query_engine = VectorStoreIndex.from_documents(
pg_essay,
use_async=True,
).as_query_engine()
# setup base query engine as tool
query_engine_tools = [
QueryEngineTool(
query_engine=vector_query_engine,
metadata=ToolMetadata(
name="pg_essay",
description="Paul Graham essay on What I Worked On",
),
),
]
query_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=query_engine_tools,
use_async=True,
)
response = query_engine.query(
"How was Paul Grahams life different before, during, and after YC?"
)
print(response)
# iterate through sub_question items captured in SUB_QUESTION event
from llama_index.core.callbacks import CBEventType, EventPayload
for i, (start_event, end_event) in enumerate(
llama_debug.get_event_pairs(CBEventType.SUB_QUESTION)
):
qa_pair = end_event.payload[EventPayload.SUB_QUESTION]
print("Sub Question " + str(i) + ": " + qa_pair.sub_q.sub_question.strip())
print("Answer: " + qa_pair.answer.strip())
print("====================================")