docs/examples/node_postprocessor/LLMReranker-Gatsby.ipynb
This tutorial showcases how to do a two-stage pass for retrieval. Use embedding-based retrieval with a high top-k value in order to maximize recall and get a large set of candidate items. Then, use LLM-based retrieval to dynamically select the nodes that are actually relevant to the query.
%pip install llama-index-llms-openai
import nest_asyncio
nest_asyncio.apply()
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import LLMRerank
from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display
from llama_index.core import Settings
# LLM (gpt-3.5-turbo)
Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.chunk_size = 512
# load documents
documents = SimpleDirectoryReader("../../../examples/gatsby/data").load_data()
documents
index = VectorStoreIndex.from_documents(
documents,
)
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
import pandas as pd
from IPython.display import display, HTML
pd.set_option("display.max_colwidth", -1)
def get_retrieved_nodes(
query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
query_bundle = QueryBundle(query_str)
# configure retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=vector_top_k,
)
retrieved_nodes = retriever.retrieve(query_bundle)
if with_reranker:
# configure reranker
reranker = LLMRerank(
choice_batch_size=5,
top_n=reranker_top_n,
)
retrieved_nodes = reranker.postprocess_nodes(
retrieved_nodes, query_bundle
)
return retrieved_nodes
def pretty_print(df):
return display(HTML(df.to_html().replace("\\n", "
")))
def visualize_retrieved_nodes(nodes) -> None:
result_dicts = []
for node in nodes:
result_dict = {"Score": node.score, "Text": node.node.get_text()}
result_dicts.append(result_dict)
pretty_print(pd.DataFrame(result_dicts))
new_nodes = get_retrieved_nodes(
"Who was driving the car that hit Myrtle?",
vector_top_k=3,
with_reranker=False,
)
visualize_retrieved_nodes(new_nodes)
new_nodes = get_retrieved_nodes(
"Who was driving the car that hit Myrtle?",
vector_top_k=10,
reranker_top_n=3,
with_reranker=True,
)
visualize_retrieved_nodes(new_nodes)
new_nodes = get_retrieved_nodes(
"What did Gatsby want Daisy to do in front of Tom?",
vector_top_k=3,
with_reranker=False,
)
visualize_retrieved_nodes(new_nodes)
new_nodes = get_retrieved_nodes(
"What did Gatsby want Daisy to do in front of Tom?",
vector_top_k=10,
reranker_top_n=3,
with_reranker=True,
)
visualize_retrieved_nodes(new_nodes)
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[
LLMRerank(
choice_batch_size=5,
top_n=2,
)
],
response_mode="tree_summarize",
)
response = query_engine.query(
"What did the author do during his time at Y Combinator?",
)