docs/examples/node_postprocessor/PrevNextPostprocessorDemo.ipynb
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/node_postprocessor/PrevNextPostprocessorDemo.ipynb" target="_parent"></a>
Showcase capabilities of leveraging Node relationships on top of PG's essay
If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
!pip install llama-index
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import (
PrevNextNodePostprocessor,
AutoPrevNextNodePostprocessor,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.storage.docstore import SimpleDocumentStore
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
# load documents
from llama_index.core import StorageContext
documents = SimpleDirectoryReader("./data/paul_graham").load_data()
# define settings
from llama_index.core import Settings
Settings.chunk_size = 512
# use node parser in settings to parse into nodes
nodes = Settings.node_parser.get_nodes_from_documents(documents)
# add to docstore
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)
storage_context = StorageContext.from_defaults(docstore=docstore)
# build index
index = VectorStoreIndex(nodes, storage_context=storage_context)
node_postprocessor = PrevNextNodePostprocessor(docstore=docstore, num_nodes=4)
query_engine = index.as_query_engine(
similarity_top_k=1,
node_postprocessors=[node_postprocessor],
response_mode="tree_summarize",
)
response = query_engine.query(
"What did the author do after handing off Y Combinator to Sam Altman?",
)
print(response)
# Try querying index without node postprocessor
query_engine = index.as_query_engine(
similarity_top_k=1, response_mode="tree_summarize"
)
response = query_engine.query(
"What did the author do after handing off Y Combinator to Sam Altman?",
)
print(response)
# Try querying index without node postprocessor and higher top-k
query_engine = index.as_query_engine(
similarity_top_k=3, response_mode="tree_summarize"
)
response = query_engine.query(
"What did the author do after handing off Y Combinator to Sam Altman?",
)
print(response)
node_postprocessor = AutoPrevNextNodePostprocessor(
docstore=docstore,
num_nodes=3,
verbose=True,
)
# Infer that we need to search nodes after current one
query_engine = index.as_query_engine(
similarity_top_k=1,
node_postprocessors=[node_postprocessor],
response_mode="tree_summarize",
)
response = query_engine.query(
"What did the author do after handing off Y Combinator to Sam Altman?",
)
print(response)
# Infer that we don't need to search previous or next
response = query_engine.query(
"What did the author do during his time at Y Combinator?",
)
print(response)
# Infer that we need to search nodes before current one
response = query_engine.query(
"What did the author do before handing off Y Combinator to Sam Altman?",
)
print(response)
response = query_engine.query(
"What did the author do before handing off Y Combinator to Sam Altman?",
)
print(response)