Back to Llama Index

Forward/Backward Augmentation

docs/examples/node_postprocessor/PrevNextPostprocessorDemo.ipynb

0.14.213.8 KB
Original Source

<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/node_postprocessor/PrevNextPostprocessorDemo.ipynb" target="_parent"></a>

Forward/Backward Augmentation

Showcase capabilities of leveraging Node relationships on top of PG's essay

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python
!pip install llama-index
python
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import (
    PrevNextNodePostprocessor,
    AutoPrevNextNodePostprocessor,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.storage.docstore import SimpleDocumentStore

Download Data

python
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

Parse Documents into Nodes, add to Docstore

python
# load documents
from llama_index.core import StorageContext


documents = SimpleDirectoryReader("./data/paul_graham").load_data()

# define settings
from llama_index.core import Settings

Settings.chunk_size = 512

# use node parser in settings to parse into nodes
nodes = Settings.node_parser.get_nodes_from_documents(documents)

# add to docstore
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)

storage_context = StorageContext.from_defaults(docstore=docstore)

Build Index

python
# build index
index = VectorStoreIndex(nodes, storage_context=storage_context)

Add PrevNext Node Postprocessor

python
node_postprocessor = PrevNextNodePostprocessor(docstore=docstore, num_nodes=4)
python
query_engine = index.as_query_engine(
    similarity_top_k=1,
    node_postprocessors=[node_postprocessor],
    response_mode="tree_summarize",
)
response = query_engine.query(
    "What did the author do after handing off Y Combinator to Sam Altman?",
)
python
print(response)
python
# Try querying index without node postprocessor
query_engine = index.as_query_engine(
    similarity_top_k=1, response_mode="tree_summarize"
)
response = query_engine.query(
    "What did the author do after handing off Y Combinator to Sam Altman?",
)
python
print(response)
python
# Try querying index without node postprocessor and higher top-k
query_engine = index.as_query_engine(
    similarity_top_k=3, response_mode="tree_summarize"
)
response = query_engine.query(
    "What did the author do after handing off Y Combinator to Sam Altman?",
)
python
print(response)

Add Auto Prev/Next Node Postprocessor

python
node_postprocessor = AutoPrevNextNodePostprocessor(
    docstore=docstore,
    num_nodes=3,
    verbose=True,
)
python
# Infer that we need to search nodes after current one
query_engine = index.as_query_engine(
    similarity_top_k=1,
    node_postprocessors=[node_postprocessor],
    response_mode="tree_summarize",
)
response = query_engine.query(
    "What did the author do after handing off Y Combinator to Sam Altman?",
)
python
print(response)
python
# Infer that we don't need to search previous or next
response = query_engine.query(
    "What did the author do during his time at Y Combinator?",
)
python
print(response)
python
# Infer that we need to search nodes before current one
response = query_engine.query(
    "What did the author do before handing off Y Combinator to Sam Altman?",
)
python
print(response)
python
response = query_engine.query(
    "What did the author do before handing off Y Combinator to Sam Altman?",
)
python
print(response)