Jina Rerank

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python

!pip install llama-index-postprocessor-jinaai-rerank
!pip install llama-index-embeddings-jinaai
!pip install llama-index

python

import os
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
)

python

from llama_index.embeddings.jinaai import JinaEmbedding

api_key = os.environ["JINA_API_KEY"]
jina_embeddings = JinaEmbedding(api_key=api_key)

# load documents
import requests

url = "https://niketeam-asset-download.nike.net/catalogs/2024/2024_Nike%20Kids_02_09_24.pdf?cb=09302022"
response = requests.get(url)
with open("Nike_Catalog.pdf", "wb") as f:
    f.write(response.content)
reader = SimpleDirectoryReader(input_files=["Nike_Catalog.pdf"])
documents = reader.load_data()

# build index
index = VectorStoreIndex.from_documents(
    documents=documents, embed_model=jina_embeddings
)

Retrieve top 10 most relevant nodes, without using a reranker

python

query_engine = index.as_query_engine(similarity_top_k=10)
response = query_engine.query(
    "What is the best jersey by Nike in terms of fabric?",
)

python

print(response.source_nodes[0].text, response.source_nodes[0].score)
print("\n")
print(response.source_nodes[1].text, response.source_nodes[1].score)

Retrieve top 10 most relevant nodes, but then rerank using Jina Reranker

By employing a reranker model, the prompt can be given more relevant context. This will lead to a more accurate response by the LLM.

python

import os
from llama_index.postprocessor.jinaai_rerank import JinaRerank

jina_rerank = JinaRerank(api_key=api_key, top_n=2)

python

query_engine = index.as_query_engine(
    similarity_top_k=10, node_postprocessors=[jina_rerank]
)
response = query_engine.query(
    "What is the best jersey by Nike in terms of fabric?",
)

python

print(response.source_nodes[0].text, response.source_nodes[0].score)
print("\n")
print(response.source_nodes[1].text, response.source_nodes[1].score)