docs/examples/embeddings/cohereai.ipynb
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/embeddings/cohereai.ipynb" target="_parent"></a>
Cohere Embed is the first embedding model that natively supports float, int8, binary and ubinary embeddings.
float embedding type.embedding_type is float with LlamaIndex. You can customize it for v3 models using parameter embedding_type.In this notebook, we will demonstrate using Cohere Embeddings with different models, input_types and embedding_types.
Refer to their main blog post for more details on Cohere int8 & binary Embeddings.
If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
%pip install llama-index-llms-cohere
%pip install llama-index-embeddings-cohere
!pip install llama-index
# Initilise with your api key
import os
cohere_api_key = "YOUR COHERE API KEY"
os.environ["COHERE_API_KEY"] = cohere_api_key
embed-english-v3.0 embeddings.input_type="search_document": Use this for texts (documents) you want to store in your vector database
input_type="search_query": Use this for search queries to find the most relevant documents in your vector database
The default embedding_type is float.
from llama_index.embeddings.cohere import CohereEmbedding
# with input_typ='search_query'
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
)
embeddings = embed_model.get_text_embedding("Hello CohereAI!")
print(len(embeddings))
print(embeddings[:5])
# with input_type = 'search_document'
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_document",
)
embeddings = embed_model.get_text_embedding("Hello CohereAI!")
print(len(embeddings))
print(embeddings[:5])
int8 embedding_typeembed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
embedding_type="int8",
)
embeddings = embed_model.get_text_embedding("Hello CohereAI!")
print(len(embeddings))
print(embeddings[:5])
binary embedding_typeembed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
embedding_type="binary",
)
embeddings = embed_model.get_text_embedding("Hello CohereAI!")
print(len(embeddings))
print(embeddings[:5])
embed-english-v2.0 embeddings.v2 models support by default float embedding_type.
embed_model = CohereEmbedding(
api_key=cohere_api_key, model_name="embed-english-v2.0"
)
embeddings = embed_model.get_text_embedding("Hello CohereAI!")
print(len(embeddings))
print(embeddings[:5])
embed-english-v3.0 embeddings,let's use
search_document to build indexsearch_query to retrive relevant context.We will experiment with int8 embedding_type.
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.cohere import Cohere
from llama_index.core.response.notebook_utils import display_source_node
from IPython.display import Markdown, display
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
int8 embedding_typellm = Cohere(model="command-nightly", api_key=cohere_api_key)
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_document",
embedding_type="int8",
)
index = VectorStoreIndex.from_documents(
documents=documents, embed_model=embed_model
)
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
embedding_type="int8",
)
search_query_retriever = index.as_retriever()
search_query_retrieved_nodes = search_query_retriever.retrieve(
"What happened in the summer of 1995?"
)
for n in search_query_retrieved_nodes:
display_source_node(n, source_length=2000)
float embedding_typellm = Cohere(model="command-nightly", api_key=cohere_api_key)
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_document",
embedding_type="float",
)
index = VectorStoreIndex.from_documents(
documents=documents, embed_model=embed_model
)
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
embedding_type="float",
)
search_query_retriever = index.as_retriever()
search_query_retrieved_nodes = search_query_retriever.retrieve(
"What happened in the summer of 1995?"
)
for n in search_query_retrieved_nodes:
display_source_node(n, source_length=2000)
binary embedding_type.embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_document",
embedding_type="binary",
)
index = VectorStoreIndex.from_documents(
documents=documents, embed_model=embed_model
)
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
input_type="search_query",
embedding_type="binary",
)
search_query_retriever = index.as_retriever()
search_query_retrieved_nodes = search_query_retriever.retrieve(
"What happened in the summer of 1995?"
)
for n in search_query_retrieved_nodes:
display_source_node(n, source_length=2000)
binary embedding type compared to float and int8. It would be interesting to do retrieval evaluation for your RAG pipeline in using float/int8/binary/ubinary embeddings.Cohere now support multi-modal embedding model where both text and image are in same embedding space.
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open("../data/images/prometheus_paper_card.png")
plt.imshow(img)
from llama_index.embeddings.cohere import CohereEmbedding
embed_model = CohereEmbedding(
api_key=cohere_api_key,
model_name="embed-english-v3.0",
)
embeddings = embed_model.get_image_embedding(
"../data/images/prometheus_paper_card.png"
)
print(len(embeddings))
print(embeddings[:5])
embeddings = embed_model.get_text_embedding("prometheus evaluation model")
print(len(embeddings))
print(embeddings[:5])