sample_apps/generative_benchmarking/compare.ipynb
This notebook walks through how to compare various embedding models with your custom benchmark results.
Install the necessary packages.
%pip install -r requirements.txt
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path
from functions.utils import *
from functions.visualize import *
results_dir = Path("results")
with open(os.path.join(results_dir, "2025-03-31--14-01-03.json"), "r") as f:
openai_small_results = json.load(f)
with open(os.path.join(results_dir, "2025-03-31--13-59-25.json"), "r") as f:
openai_large_results = json.load(f)
with open(os.path.join(results_dir, "2025-03-31--14-08-55.json"), "r") as f:
jina_results = json.load(f)
with open(os.path.join(results_dir, "2025-03-31--14-10-29.json"), "r") as f:
voyage_results = json.load(f)
# Load in the results you wish to compare
results_list = [openai_small_results, openai_large_results, jina_results, voyage_results] # Add as many results as you want to compare
# Create a dataframe of the results
metrics_df = create_metrics_dataframe(results_list)
metrics_df
compare_embedding_models(
metrics_df = metrics_df,
metric = "Recall@3",
title = "Recall@3 Scores by Model"
)