Back to Llama Index

Knowledge Graph Index

docs/examples/index_structs/knowledge_graph/KnowledgeGraphDemo.ipynb

0.14.214.0 KB
Original Source

Knowledge Graph Index

This tutorial gives a basic overview of how to use our KnowledgeGraphIndex, which handles automated knowledge graph construction from unstructured text as well as entity-based querying.

If you would like to query knowledge graphs in more flexible ways, including pre-existing ones, please check out our KnowledgeGraphQueryEngine and other constructs.

python
%pip install llama-index-llms-openai
python
# My OpenAI Key
import os

os.environ["OPENAI_API_KEY"] = "INSERT OPENAI KEY"
python
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

Using Knowledge Graph

Building the Knowledge Graph

python
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex
from llama_index.core.graph_stores import SimpleGraphStore

from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from IPython.display import Markdown, display
python
documents = SimpleDirectoryReader(
    "../../../../examples/paul_graham_essay/data"
).load_data()
python
# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors

llm = OpenAI(temperature=0, model="text-davinci-002")
Settings.llm = llm
Settings.chunk_size = 512
python
from llama_index.core import StorageContext

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
)

[Optional] Try building the graph and manually add triplets!

Querying the Knowledge Graph

python
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about Interleaf",
)
python
display(Markdown(f"<b>{response}</b>"))
python
query_engine = index.as_query_engine(
    include_text=True, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)
python
display(Markdown(f"<b>{response}</b>"))

Query with embeddings

python
# NOTE: can take a while!
new_index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    include_embeddings=True,
)
python
# query using top 3 triplets plus keywords (duplicate triplets are removed)
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)
python
display(Markdown(f"<b>{response}</b>"))

Visualizing the Graph

python
## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")

[Optional] Try building the graph and manually add triplets!

python
from llama_index.core.node_parser import SentenceSplitter
python
node_parser = SentenceSplitter()
python
nodes = node_parser.get_nodes_from_documents(documents)
python
# initialize an empty index for now
index = KnowledgeGraphIndex(
    [],
)
python
# add keyword mappings and nodes manually
# add triplets (subject, relationship, object)

# for node 0
node_0_tups = [
    ("author", "worked on", "writing"),
    ("author", "worked on", "programming"),
]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])

# for node 1
node_1_tups = [
    ("Interleaf", "made software for", "creating documents"),
    ("Interleaf", "added", "scripting language"),
    ("software", "generate", "web sites"),
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])
python
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about Interleaf",
)
python
str(response)