docs/examples/docstore/DocstoreDemo.ipynb
This guide shows you how to directly use our DocumentStore abstraction. By putting nodes in the docstore, this allows you to define multiple indices over the same underlying docstore, instead of duplicating data across indices.
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/docstore/DocstoreDemo.ipynb" target="_parent"></a>
If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
%pip install llama-index-llms-openai
!pip install llama-index
import nest_asyncio
nest_asyncio.apply()
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex, SimpleKeywordTableIndex
from llama_index.core import SummaryIndex
from llama_index.core import ComposableGraph
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
reader = SimpleDirectoryReader("./data/paul_graham/")
documents = reader.load_data()
from llama_index.core.node_parser import SentenceSplitter
nodes = SentenceSplitter().get_nodes_from_documents(documents)
from llama_index.core.storage.docstore import SimpleDocumentStore
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)
Each index uses the same underlying Node.
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(docstore=docstore)
summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
keyword_table_index = SimpleKeywordTableIndex(
nodes, storage_context=storage_context
)
# NOTE: the docstore still has the same nodes
len(storage_context.docstore.docs)
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.llm = llm
Settings.chunk_size = 1024
query_engine = summary_index.as_query_engine()
response = query_engine.query("What is a summary of this document?")
query_engine = vector_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
query_engine = keyword_table_index.as_query_engine()
response = query_engine.query("What did the author do after his time at YC?")
print(response)