examples/basic_functionality/alternative_embeddings.ipynb
This notebook demonstrates how to use alternative embedding functions.
import chromadb
client = chromadb.Client()
from chromadb.utils import embedding_functions
# Using OpenAI Embeddings. This assumes you have the openai package installed
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key="OPENAI_KEY", # Replace with your own OpenAI API key
model_name="text-embedding-ada-002"
)
# Create a new chroma collection
openai_collection = client.get_or_create_collection(name="openai_embeddings", embedding_function=openai_ef)
openai_collection.add(
documents=["This is a document", "This is another document"],
metadatas=[{"source": "my_source"}, {"source": "my_source"}],
ids=["id1", "id2"]
)
results = openai_collection.query(
query_texts=["This is a query document"],
n_results=2
)
results
# Using Cohere Embeddings. This assumes you have the cohere package installed
cohere_ef = embedding_functions.CohereEmbeddingFunction(
api_key="COHERE_API_KEY",
model_name="large"
)
# Create a new chroma collection
cohere_collection = client.create_collection(name="cohere_embeddings", embedding_function=cohere_ef)
cohere_collection.add(
documents=["This is a document", "This is another document"],
metadatas=[{"source": "my_source"}, {"source": "my_source"}],
ids=["id1", "id2"]
)
results = cohere_collection.query(
query_texts=["This is a query document"],
n_results=2
)
results
# Using Instructor models. The embedding function requires the InstructorEmbedding package.
# To install it, run pip install InstructorEmbedding
#uses base model and cpu
instructor_ef = embedding_functions.InstructorEmbeddingFunction()
# For task specific embeddings, add an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(
# instruction="Represent the Wikipedia document for retrieval: "
# )
# Uses hkunlp/instructor-xl model and GPU
#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-xl", device="cuda")
# Create a collection with the instructor embedding function
instructor_collection = client.create_collection(name="instructor_embeddings", embedding_function=instructor_ef)
instructor_collection.add(
documents=["This is a document", "This is another document"],
metadatas=[{"source": "my_source"}, {"source": "my_source"}],
ids=["id1", "id2"]
)
# Adding documents with an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(
# instruction="Represent the Science sentence: "
# )
# instructor_collection = client.create_collection(name="instructor_embeddings", embedding_function=instructor_ef)
# instructor_collection.add(documents=["Parton energy loss in QCD matter"], ids=["id1"])
results = instructor_collection.query(
query_texts=["This is a query document"],
n_results=2
)
results
# Querying with an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(instruction="Represent the Wikipedia question for retrieving supporting documents: ")
# instructor_collection = client.get_collection(name="instructor_embeddings", embedding_function=instructor_ef)
# results = instructor_collection.query(query_texts=["where is the food stored in a yam plant"])
# Using HuggingFace models. The embedding function a huggingface api_key
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
api_key="HUGGINGFACE_API_KEY", # Replace with your own HuggingFace API key
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Create a new HuggingFace collection
huggingface_collection = client.create_collection(name="huggingface_embeddings", embedding_function=huggingface_ef)
huggingface_collection.add(
documents=["This is a document", "This is another document"],
metadatas=[{"source": "my_source"}, {"source": "my_source"}],
ids=["id1", "id2"]
)
results = huggingface_collection.query(
query_texts=["This is a query document"],
n_results=2
)
results