docs/components/rerankers/models/llm_reranker.mdx
The LLM reranker allows you to use any supported language model as a reranker. This approach uses prompts to instruct the LLM to score and rank memories based on their relevance to the query. While slower than specialized rerankers, it offers maximum flexibility and can be fine-tuned with custom prompts.
from mem0 import Memory
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-openai-api-key"
}
}
}
m = Memory.from_config(config)
| Parameter | Type | Default | Description |
|---|---|---|---|
provider | str | "openai" | LLM provider (openai, anthropic, etc.) |
model | str | "gpt-4o-mini" | LLM model to use for reranking |
api_key | str | None | API key for the LLM provider |
top_k | int | None | Number of top documents to return after reranking |
temperature | float | 0.0 | LLM temperature for consistency |
max_tokens | int | 100 | Maximum tokens for LLM response |
scoring_prompt | str | None | Custom prompt template for scoring documents |
llm | dict | None | Optional nested LLM config for provider-specific fields (e.g., ollama_base_url, azure_endpoint). Overrides top-level provider/model/api_key when provided. |
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "anthropic",
"model": "claude-sonnet-4-20250514",
"api_key": "your-anthropic-api-key",
"top_k": 15,
"temperature": 0.0,
"scoring_prompt": """
Rate the relevance of each memory to the query on a scale of 0.0-1.0.
Consider semantic similarity, context, and practical utility.
Only provide the numeric score.
Query: "{query}"
Document: "{document}"
Score:
"""
}
}
}
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-openai-api-key",
"temperature": 0.0
}
}
}
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "anthropic",
"model": "claude-sonnet-4-20250514",
"api_key": "your-anthropic-api-key"
}
}
}
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "ollama",
"model": "llama3.2",
"llm": {
"provider": "ollama",
"config": {
"model": "llama3.2",
"ollama_base_url": "http://localhost:11434"
}
}
}
}
}
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "azure_openai",
"model": "gpt-4o-mini",
"api_key": "your-azure-api-key",
"llm": {
"provider": "azure_openai",
"config": {
"model": "gpt-4o-mini",
"api_key": "your-azure-api-key",
"azure_endpoint": "https://your-resource.openai.azure.com/",
"azure_deployment": "gpt-4o-mini-deployment"
}
}
}
}
}
The default prompt asks the LLM to score relevance on a 0.0-1.0 scale:
You are a relevance scoring assistant. Given a query and a document, you need to score how relevant the document is to the query.
Score the relevance on a scale from 0.0 to 1.0, where:
- 1.0 = Perfectly relevant and directly answers the query
- 0.8-0.9 = Highly relevant with good information
- 0.6-0.7 = Moderately relevant with some useful information
- 0.4-0.5 = Slightly relevant with limited useful information
- 0.0-0.3 = Not relevant or no useful information
Query: "{query}"
Document: "{document}"
Provide only a single numerical score between 0.0 and 1.0. Do not include any explanation or additional text.
custom_prompt = """
You are a medical information specialist. Rate how relevant each memory is for answering the medical query.
Consider clinical accuracy, specificity, and practical applicability.
Rate from 0.0 to 1.0 where:
- 0.0-0.3: Irrelevant or potentially harmful
- 0.4-0.6: Somewhat relevant but incomplete
- 0.7-0.8: Relevant and helpful
- 0.9-1.0: Highly relevant and clinically useful
Query: "{query}"
Document: "{document}"
Score:
"""
config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-api-key",
"scoring_prompt": custom_prompt
}
}
}
contextual_prompt = """
Rate how well this memory answers the specific question asked.
Consider:
- Direct relevance to the question
- Completeness of information
- Recency and accuracy
- Practical usefulness
Rate 0.0-1.0:
0.0 = Not relevant
0.25 = Slightly relevant
0.5 = Moderately relevant
0.75 = Very relevant
1.0 = Perfectly answers the question
Query: "{query}"
Document: "{document}"
Score:
"""
conversation_prompt = """
You are helping evaluate which memories are most useful for a conversational AI assistant.
Rate how helpful this memory would be for generating a relevant response.
Consider:
- Direct relevance to user's intent
- Emotional appropriateness
- Factual accuracy
- Conversation flow
Rate 0.0-1.0:
Query: "{query}"
Document: "{document}"
Score:
"""
from mem0 import Memory
m = Memory.from_config(config)
# Add memories
m.add("I'm allergic to peanuts", user_id="alice")
m.add("I love Italian food", user_id="alice")
m.add("I'm vegetarian", user_id="alice")
# Search with LLM reranking
results = m.search(
"What foods should I avoid?",
user_id="alice",
rerank=True
)
for result in results["results"]:
print(f"Memory: {result['memory']}")
print(f"LLM Score: {result['score']:.2f}")
def safe_llm_rerank_search(query, user_id, max_retries=3):
for attempt in range(max_retries):
try:
return m.search(query, filters={"user_id": user_id}, rerank=True)
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt == max_retries - 1:
# Fall back to vector search
return m.search(query, filters={"user_id": user_id}, rerank=False)
# Use the safe function
results = safe_llm_rerank_search("What are my preferences?", "alice")
| Model Type | Speed | Quality | Cost | Best For |
|---|---|---|---|---|
| GPT-4o mini | Fast | Good | Low | High-volume applications |
| GPT-4o | Medium | Excellent | Medium | Quality-critical applications |
| Claude Sonnet | Medium | Excellent | Medium | Balanced performance |
| Ollama Local | Variable | Good | Free | Privacy-sensitive applications |
# Fast configuration for high-volume use
fast_config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-api-key",
"top_k": 5,
"temperature": 0.0
}
}
}
# High-quality configuration
quality_config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o",
"api_key": "your-api-key",
"top_k": 15,
"temperature": 0.0
}
}
}
reasoning_prompt = """
Evaluate this memory's relevance using multi-step reasoning:
1. What is the main intent of the query?
2. What key information does the memory contain?
3. How directly does the memory address the query?
4. What additional context might be needed?
Based on this analysis, rate relevance 0.0-1.0:
Query: "{query}"
Document: "{document}"
Analysis:
Step 1 (Intent):
Step 2 (Information):
Step 3 (Directness):
Step 4 (Context):
Final Score:
"""
class RobustLLMReranker:
def __init__(self, primary_config, fallback_config=None):
self.primary = Memory.from_config(primary_config)
self.fallback = Memory.from_config(fallback_config) if fallback_config else None
def search(self, query, user_id, max_retries=2):
# Try primary LLM reranker
for attempt in range(max_retries):
try:
return self.primary.search(query, filters={"user_id": user_id}, rerank=True)
except Exception as e:
print(f"Primary reranker attempt {attempt + 1} failed: {e}")
# Try fallback reranker
if self.fallback:
try:
return self.fallback.search(query, filters={"user_id": user_id}, rerank=True)
except Exception as e:
print(f"Fallback reranker failed: {e}")
# Final fallback: vector search only
return self.primary.search(query, filters={"user_id": user_id}, rerank=False)
# Usage
primary_config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o",
"api_key": "your-api-key"
}
}
}
fallback_config = {
"reranker": {
"provider": "llm_reranker",
"config": {
"provider": "openai",
"model": "gpt-4o-mini",
"api_key": "your-api-key"
}
}
}
reranker = RobustLLMReranker(primary_config, fallback_config)
results = reranker.search("What are my preferences?", "alice")
Inconsistent Scores
API Rate Limits
Poor Ranking Quality