docs/open-source/features/reranker-search.mdx
Reranker-enhanced search adds a second scoring pass after vector retrieval so Mem0 can return the most relevant memories first. Enable it when keyword similarity alone misses nuance or when you need the highest-confidence context for an agent decision.
<Info> **You’ll use this when…** - Queries are nuanced and require semantic understanding beyond vector distance. - Large memory collections produce too many near matches to review manually. - You want consistent scoring across providers by delegating ranking to a dedicated model. </Info> <Warning> Reranking raises latency and, for hosted models, API spend. Benchmark with production traffic and define a fallback path for latency-sensitive requests. </Warning> <Note> All configuration snippets translate directly to the TypeScript SDK—swap dictionaries for objects while keeping the same keys (`provider`, `config`, `rerank` flags). </Note>from mem0 import Memory
config = {
"reranker": {
"provider": "cohere",
"config": {
"model": "rerank-english-v3.0",
"api_key": "your-cohere-api-key"
}
}
}
m = Memory.from_config(config)
# Cohere reranker
config = {
"reranker": {
"provider": "cohere",
"config": {
"model": "rerank-english-v3.0",
"api_key": "your-cohere-api-key",
"top_k": 10,
"return_documents": True
}
}
}
# Sentence Transformer reranker
config = {
"reranker": {
"provider": "sentence_transformer",
"config": {
"model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"device": "cuda",
"max_length": 512
}
}
}
# Hugging Face reranker
config = {
"reranker": {
"provider": "huggingface",
"config": {
"model": "BAAI/bge-reranker-base",
"device": "cuda",
"batch_size": 32
}
}
}
# LLM-based reranker
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-openai-api-key",
"top_k": 5
}
}
}
config = {
"vector_store": {
"provider": "qdrant",
"config": {
"host": "localhost",
"port": 6333
}
},
"llm": {
"provider": "openai",
"config": {
"model": "gpt-4",
"api_key": "your-openai-api-key"
}
},
"embedder": {
"provider": "openai",
"config": {
"model": "text-embedding-3-small",
"api_key": "your-openai-api-key"
}
},
"reranker": {
"provider": "cohere",
"config": {
"model": "rerank-english-v3.0",
"api_key": "your-cohere-api-key",
"top_k": 15,
"return_documents": True
}
}
}
m = Memory.from_config(config)
from mem0 import AsyncMemory
async_memory = AsyncMemory.from_config(config)
async def search_with_rerank():
return await async_memory.search(
"What are my preferences?",
filters={"user_id": "alice"},
rerank=True
)
import asyncio
results = asyncio.run(search_with_rerank())
# GPU-friendly local reranker configuration
config = {
"reranker": {
"provider": "sentence_transformer",
"config": {
"model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"device": "cuda",
"batch_size": 32,
"top_k": 10,
"max_length": 256
}
}
}
# Smart toggle for hosted rerankers
def smart_search(query, user_id, use_rerank=None):
if use_rerank is None:
use_rerank = len(query.split()) > 3
return m.search(query, filters={"user_id": user_id}, rerank=use_rerank)
try:
results = m.search("test query", filters={"user_id": "alice"}, rerank=True)
except Exception as exc:
print(f"Reranking failed: {exc}")
results = m.search("test query", filters={"user_id": "alice"}, rerank=False)
# Before: basic vector search
results = m.search("query", filters={"user_id": "alice"})
# After: same API with reranking enabled via config
config = {
"reranker": {
"provider": "sentence_transformer",
"config": {
"model": "cross-encoder/ms-marco-MiniLM-L-6-v2"
}
}
}
m = Memory.from_config(config)
results = m.search("query", filters={"user_id": "alice"})
results = m.search(
"What are my food preferences?",
filters={"user_id": "alice"}
)
for result in results["results"]:
print(f"Memory: {result['memory']}")
print(f"Score: {result['score']}")
results_with_rerank = m.search(
"What movies do I like?",
filters={"user_id": "alice"},
rerank=True
)
results_without_rerank = m.search(
"What movies do I like?",
filters={"user_id": "alice"},
rerank=False
)
results = m.search(
"important work tasks",
filters={
"AND": [
{"user_id": "alice"},
{"category": "work"},
{"priority": {"gte": 7}}
]
},
rerank=True,
top_k=20
)
config = {
"reranker": {
"provider": "cohere",
"config": {
"model": "rerank-english-v3.0",
"api_key": "your-cohere-api-key"
}
}
}
m = Memory.from_config(config)
results = m.search(
"customer having login issues with mobile app",
filters={"agent_id": "support_bot", "category": "technical_support"},
rerank=True
)
results = m.search(
"science fiction books with space exploration themes",
filters={"user_id": "reader123", "content_type": "book_recommendation"},
rerank=True,
top_k=10
)
for result in results["results"]:
print(f"Recommendation: {result['memory']}")
print(f"Relevance: {result['score']:.3f}")
results = m.search(
"What restaurants did I enjoy last month that had good vegetarian options?",
filters={
"AND": [
{"user_id": "foodie_user"},
{"category": "dining"},
{"rating": {"gte": 4}},
{"date": {"gte": "2024-01-01"}}
]
},
rerank=True
)
score (vector) and reranker scores; mismatched fields indicate the reranker didn’t execute.top_k and selective toggles to cap hosted reranker costs.