Back to Promptfoo

Building a Deep Research Compatible MCP Server

examples/openai-deep-research/mcp-server-example.md

0.121.99.7 KB
Original Source

Building a Deep Research Compatible MCP Server

Deep research models require MCP servers that implement a specific search and fetch interface. This guide shows you how to build a compatible server.

Required Interface

Your MCP server must provide exactly two tools:

  1. search - Searches your data and returns results
  2. fetch - Retrieves full content for a specific document

Example Implementation

Here's a minimal Express.js server that implements the required interface:

javascript
const express = require('express');
const app = express();
app.use(express.json());

// Sample data store
const documents = {
  doc1: {
    id: 'doc1',
    title: 'Q1 Sales Report',
    content: 'Total sales for Q1 were $1.2M, up 15% from last year...',
    metadata: { department: 'sales', date: '2025-03-31' },
  },
  doc2: {
    id: 'doc2',
    title: 'Product Roadmap 2025',
    content: 'Key features planned: AI integration, mobile app redesign...',
    metadata: { department: 'product', date: '2025-01-15' },
  },
};

// MCP endpoint
app.post('/mcp', async (req, res) => {
  const { method, params } = req.body;

  // List available tools
  if (method === 'tools/list') {
    return res.json({
      tools: [
        {
          name: 'search',
          description: 'Search internal documents',
          inputSchema: {
            type: 'object',
            properties: {
              query: { type: 'string', description: 'Search query' },
              limit: { type: 'number', description: 'Max results', default: 10 },
            },
            required: ['query'],
          },
        },
        {
          name: 'fetch',
          description: 'Fetch document by ID',
          inputSchema: {
            type: 'object',
            properties: {
              id: { type: 'string', description: 'Document ID' },
            },
            required: ['id'],
          },
        },
      ],
    });
  }

  // Handle tool calls
  if (method === 'tools/call') {
    const { name, arguments: args } = params;

    if (name === 'search') {
      // Simple search implementation
      const query = args.query.toLowerCase();
      const results = Object.values(documents)
        .filter(
          (doc) =>
            doc.title.toLowerCase().includes(query) || doc.content.toLowerCase().includes(query),
        )
        .slice(0, args.limit || 10)
        .map((doc) => ({
          id: doc.id,
          title: doc.title,
          snippet: doc.content.substring(0, 100) + '...',
          metadata: doc.metadata,
        }));

      return res.json({
        content: [
          {
            type: 'text',
            text: JSON.stringify({ results }, null, 2),
          },
        ],
      });
    }

    if (name === 'fetch') {
      const doc = documents[args.id];
      if (!doc) {
        return res.json({
          content: [
            {
              type: 'text',
              text: 'Document not found',
            },
          ],
          isError: true,
        });
      }

      return res.json({
        content: [
          {
            type: 'text',
            text: JSON.stringify(doc, null, 2),
          },
        ],
      });
    }

    return res.status(400).json({ error: 'Unknown tool' });
  }

  return res.status(400).json({ error: 'Unknown method' });
});

app.listen(3000, () => {
  console.log('MCP server running on http://localhost:3000');
});

Python Example with FastAPI

python
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict, List, Any, Optional
import json

app = FastAPI()

# Sample data store
documents = {
    "doc1": {
        "id": "doc1",
        "title": "Q1 Sales Report",
        "content": "Total sales for Q1 were $1.2M, up 15% from last year...",
        "metadata": {"department": "sales", "date": "2025-03-31"}
    },
    "doc2": {
        "id": "doc2",
        "title": "Product Roadmap 2025",
        "content": "Key features planned: AI integration, mobile app redesign...",
        "metadata": {"department": "product", "date": "2025-01-15"}
    }
}

class MCPRequest(BaseModel):
    method: str
    params: Optional[Dict[str, Any]] = None

@app.post("/mcp")
async def mcp_endpoint(request: MCPRequest):
    if request.method == "tools/list":
        return {
            "tools": [
                {
                    "name": "search",
                    "description": "Search internal documents",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "query": {"type": "string", "description": "Search query"},
                            "limit": {"type": "number", "description": "Max results", "default": 10}
                        },
                        "required": ["query"]
                    }
                },
                {
                    "name": "fetch",
                    "description": "Fetch document by ID",
                    "inputSchema": {
                        "type": "object",
                        "properties": {
                            "id": {"type": "string", "description": "Document ID"}
                        },
                        "required": ["id"]
                    }
                }
            ]
        }

    elif request.method == "tools/call":
        tool_name = request.params.get("name")
        args = request.params.get("arguments", {})

        if tool_name == "search":
            query = args.get("query", "").lower()
            limit = args.get("limit", 10)

            results = []
            for doc in documents.values():
                if query in doc["title"].lower() or query in doc["content"].lower():
                    results.append({
                        "id": doc["id"],
                        "title": doc["title"],
                        "snippet": doc["content"][:100] + "...",
                        "metadata": doc["metadata"]
                    })

            return {
                "content": [
                    {
                        "type": "text",
                        "text": json.dumps({"results": results[:limit]}, indent=2)
                    }
                ]
            }

        elif tool_name == "fetch":
            doc_id = args.get("id")
            doc = documents.get(doc_id)

            if not doc:
                return {
                    "content": [
                        {
                            "type": "text",
                            "text": "Document not found"
                        }
                    ],
                    "isError": True
                }

            return {
                "content": [
                    {
                        "type": "text",
                        "text": json.dumps(doc, indent=2)
                    }
                ]
            }

        raise HTTPException(status_code=400, detail="Unknown tool")

    raise HTTPException(status_code=400, detail="Unknown method")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=3000)

Integration with Deep Research

Configure your deep research model to use your MCP server:

yaml
providers:
  - id: openai:responses:o3-deep-research
    config:
      max_output_tokens: 100000
      tools:
        - type: web_search_preview # Required
        - type: mcp
          server_label: internal_docs
          server_url: http://localhost:3000/mcp
          require_approval: never # Required for deep research
          headers:
            Authorization: Bearer your-api-key

Best Practices

  1. Efficient Search: Implement proper indexing (e.g., Elasticsearch, PostgreSQL full-text search)
  2. Result Ranking: Return most relevant results first
  3. Metadata: Include useful metadata in search results
  4. Error Handling: Return clear error messages for debugging
  5. Authentication: Secure your MCP server with API keys or OAuth
  6. Rate Limiting: Implement rate limits to prevent abuse
  7. Logging: Log all requests for debugging and monitoring

Testing Your MCP Server

Test your server using curl:

bash
# List tools
curl -X POST http://localhost:3000/mcp \
  -H "Content-Type: application/json" \
  -d '{"method": "tools/list"}'

# Search
curl -X POST http://localhost:3000/mcp \
  -H "Content-Type: application/json" \
  -d '{
    "method": "tools/call",
    "params": {
      "name": "search",
      "arguments": {"query": "sales"}
    }
  }'

# Fetch
curl -X POST http://localhost:3000/mcp \
  -H "Content-Type: application/json" \
  -d '{
    "method": "tools/call",
    "params": {
      "name": "fetch",
      "arguments": {"id": "doc1"}
    }
  }'

Advanced Features

Connecting to Real Data Sources

Replace the sample data with connections to your actual systems:

javascript
// Example: Connect to PostgreSQL
const { Pool } = require('pg');
const pool = new Pool({
  connectionString: process.env.DATABASE_URL,
});

// In your search handler
if (name === 'search') {
  const result = await pool.query(
    'SELECT id, title, content FROM documents WHERE to_tsvector(content) @@ plainto_tsquery($1) LIMIT $2',
    [args.query, args.limit || 10],
  );
  // Format and return results...
}

Adding Filters

Enhance search with filters:

javascript
{
  name: 'search',
  inputSchema: {
    type: 'object',
    properties: {
      query: { type: 'string' },
      filters: {
        type: 'object',
        properties: {
          department: { type: 'string' },
          dateFrom: { type: 'string', format: 'date' },
          dateTo: { type: 'string', format: 'date' }
        }
      }
    }
  }
}

Remember: Deep research models will automatically use your search and fetch tools to gather information needed to answer user queries. The better your search implementation, the better the research results will be.