examples/use_with/cohere/cohere_python.ipynb
This notebook demonstrates how to use Cohere Embeddings with Chroma.
If you have not already, create a Cohere account and get your API Key.
First a basic example:
! pip install chromadb --quiet
! pip install cohere --quiet
! pip install Pillow --quiet
! pip install datasets --quiet
! pip install matplotlib --quiet
import os
import getpass
os.environ["CHROMA_COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")
import chromadb
from chromadb.utils import embedding_functions
cohere_ef = embedding_functions.CohereEmbeddingFunction(api_key=os.environ["CHROMA_COHERE_API_KEY"], model_name="large")
client = chromadb.Client()
collection = client.create_collection("cohere_python", embedding_function=cohere_ef)
collection.add(
ids=["1", "2", "3"],
documents=["I like apples", "I like bananas", "I like oranges"],
metadatas=[{"fruit": "apple"}, {"fruit": "banana"}, {"fruit": "orange"}],
)
print(collection.query(query_texts=["citrus"], n_results=1))
Cohere can support many languages! In this example we store text in many languages, and then query in English.
cohere_mutlilingual = embedding_functions.CohereEmbeddingFunction(
api_key=os.environ["CHROMA_COHERE_API_KEY"],
model_name="multilingual-22-12")
# 나는 오렌지를 좋아한다 is "I like oranges" in Korean
multilingual_texts = [ 'Hello from Cohere!', 'مرحبًا من كوهير!',
'Hallo von Cohere!', 'Bonjour de Cohere!',
'¡Hola desde Cohere!', 'Olá do Cohere!',
'Ciao da Cohere!', '您好,来自 Cohere!',
'कोहेरे से नमस्ते!', '나는 오렌지를 좋아한다' ]
collection = client.create_collection("cohere_multilingual", embedding_function=cohere_mutlilingual)
collection.add(
ids=[str(i) for i in range(len(multilingual_texts))],
documents=multilingual_texts
)
print(collection.query(query_texts=["citrus"], n_results=1))
Cohere now supports images! In this example we store images in many languages, and then query in text.
import os
from datasets import load_dataset, Image
from matplotlib import pyplot as plt
dataset = load_dataset(path="detection-datasets/coco", split="train", streaming=True)
IMAGE_FOLDER = "images"
N_IMAGES = 5
# For plotting
plot_cols = 5
plot_rows = N_IMAGES // plot_cols
fig, axes = plt.subplots(plot_rows, plot_cols, figsize=(plot_rows*2, plot_cols*2))
axes = axes.flatten()
# Write the images to a folder
dataset_iter = iter(dataset)
os.makedirs(IMAGE_FOLDER, exist_ok=True)
for i in range(N_IMAGES):
image = next(dataset_iter)['image']
axes[i].imshow(image)
axes[i].axis("off")
image.save(f"images/{i}.jpg")
plt.tight_layout()
plt.show()
from chromadb.utils.data_loaders import ImageLoader
from chromadb.utils.embedding_functions import CohereEmbeddingFunction
multimodal_cohere_ef = CohereEmbeddingFunction(
model_name="embed-english-v3.0",
)
image_loader = ImageLoader()
multimodal_collection = client.create_collection(
name="multimodal",
embedding_function=multimodal_cohere_ef,
data_loader=image_loader)
# Get the uris to the images
image_uris = sorted([os.path.join(IMAGE_FOLDER, image_name) for image_name in os.listdir(IMAGE_FOLDER)])
ids = [str(i) for i in range(len(image_uris))]
for i in range(len(image_uris)):
print(f"Adding image {i}: {image_uris[i]}")
# max images per add is 1, see cohere docs https://docs.cohere.com/v2/reference/embed#request.body.images
multimodal_collection.add(ids=[str(i)], uris=[image_uris[i]])
retrieved = multimodal_collection.query(query_texts=["animals"], include=['data'], n_results=3)
for img in retrieved['data'][0]:
plt.imshow(img)
plt.axis("off")
plt.show()