Knowledge Graph Index

This tutorial gives a basic overview of how to use our KnowledgeGraphIndex, which handles automated knowledge graph construction from unstructured text as well as entity-based querying.

If you would like to query knowledge graphs in more flexible ways, including pre-existing ones, please check out our KnowledgeGraphQueryEngine and other constructs.

python

%pip install llama-index-llms-openai

python

# My OpenAI Key
import os

os.environ["OPENAI_API_KEY"] = "INSERT OPENAI KEY"

python

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

Using Knowledge Graph

Building the Knowledge Graph

python

from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex
from llama_index.core.graph_stores import SimpleGraphStore

from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from IPython.display import Markdown, display

python

documents = SimpleDirectoryReader(
    "../../../../examples/paul_graham_essay/data"
).load_data()

python

# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors

llm = OpenAI(temperature=0, model="text-davinci-002")
Settings.llm = llm
Settings.chunk_size = 512

python

from llama_index.core import StorageContext

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
)

[Optional] Try building the graph and manually add triplets!

Querying the Knowledge Graph

python

query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about Interleaf",
)

python

display(Markdown(f"<b>{response}</b>"))

python

query_engine = index.as_query_engine(
    include_text=True, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

python

display(Markdown(f"<b>{response}</b>"))

Query with embeddings

python

# NOTE: can take a while!
new_index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    include_embeddings=True,
)

python

# query using top 3 triplets plus keywords (duplicate triplets are removed)
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

python

display(Markdown(f"<b>{response}</b>"))

Visualizing the Graph

python

## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")

[Optional] Try building the graph and manually add triplets!

python

from llama_index.core.node_parser import SentenceSplitter

python

node_parser = SentenceSplitter()

python

nodes = node_parser.get_nodes_from_documents(documents)

python

# initialize an empty index for now
index = KnowledgeGraphIndex(
    [],
)

python

# add keyword mappings and nodes manually
# add triplets (subject, relationship, object)

# for node 0
node_0_tups = [
    ("author", "worked on", "writing"),
    ("author", "worked on", "programming"),
]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])

# for node 1
node_1_tups = [
    ("Interleaf", "made software for", "creating documents"),
    ("Interleaf", "added", "scripting language"),
    ("software", "generate", "web sites"),
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])

python

query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about Interleaf",
)

python

str(response)