Back to Llama Index

Redis Docstore+Index Store Demo

docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb

0.14.214.9 KB
Original Source

<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/docstore/RedisDocstoreIndexStoreDemo.ipynb" target="_parent"></a>

Redis Docstore+Index Store Demo

This guide shows you how to directly use our DocumentStore abstraction and IndexStore abstraction backed by Redis. By putting nodes in the docstore, this allows you to define multiple indices over the same underlying docstore, instead of duplicating data across indices.

The index itself is also stored in Redis through the IndexStore.

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python
%pip install llama-index-storage-docstore-redis
%pip install llama-index-storage-index-store-redis
%pip install llama-index-llms-openai
python
!pip install llama-index
python
import nest_asyncio

nest_asyncio.apply()
python
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
python
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex, SimpleKeywordTableIndex
from llama_index.core import SummaryIndex
from llama_index.core import ComposableGraph
from llama_index.llms.openai import OpenAI
from llama_index.core.response.notebook_utils import display_response
from llama_index.core import Settings

Download Data

python
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

Load Documents

python
reader = SimpleDirectoryReader("./data/paul_graham/")
documents = reader.load_data()

Parse into Nodes

python
from llama_index.core.node_parser import SentenceSplitter

nodes = SentenceSplitter().get_nodes_from_documents(documents)

Add to Docstore

python
REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
REDIS_PORT = os.getenv("REDIS_PORT", 6379)
python
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.storage.index_store.redis import RedisIndexStore
python
storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
    index_store=RedisIndexStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
)
python
storage_context.docstore.add_documents(nodes)
python
len(storage_context.docstore.docs)

Define Multiple Indexes

Each index uses the same underlying Node.

python
summary_index = SummaryIndex(nodes, storage_context=storage_context)
python
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
python
keyword_table_index = SimpleKeywordTableIndex(
    nodes, storage_context=storage_context
)
python
# NOTE: the docstore still has the same nodes
len(storage_context.docstore.docs)

Test out saving and loading

python
# NOTE: docstore and index_store is persisted in Redis by default
# NOTE: here only need to persist simple vector store to disk
storage_context.persist(persist_dir="./storage")
python
# note down index IDs
list_id = summary_index.index_id
vector_id = vector_index.index_id
keyword_id = keyword_table_index.index_id
python
from llama_index.core import load_index_from_storage

# re-create storage context
storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
    index_store=RedisIndexStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
)

# load indices
summary_index = load_index_from_storage(
    storage_context=storage_context, index_id=list_id
)
vector_index = load_index_from_storage(
    storage_context=storage_context, index_id=vector_id
)
keyword_table_index = load_index_from_storage(
    storage_context=storage_context, index_id=keyword_id
)

Test out some Queries

python
chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.llm = chatgpt
Settings.chunk_size = 1024
python
query_engine = summary_index.as_query_engine()
list_response = query_engine.query("What is a summary of this document?")
python
display_response(list_response)
python
query_engine = vector_index.as_query_engine()
vector_response = query_engine.query("What did the author do growing up?")
python
display_response(vector_response)
python
query_engine = keyword_table_index.as_query_engine()
keyword_response = query_engine.query(
    "What did the author do after his time at YC?"
)
python
display_response(keyword_response)