Log in Get started

Nvidia | Models

docs/src/content/en/models/providers/nvidia.mdx

2025-12-1821.9 KB

Original Source

Nvidia

Access 73 Nvidia models through Mastra's model router. Authentication is handled automatically using the NVIDIA_API_KEY environment variable.

Learn more in the Nvidia documentation.

bash

NVIDIA_API_KEY=your-api-key

typescript

import { Agent } from "@mastra/core/agent";

const agent = new Agent({
  id: "my-agent",
  name: "My Agent",
  instructions: "You are a helpful assistant",
  model: "nvidia/black-forest-labs/flux.1-dev"
});

// Generate a response
const response = await agent.generate("Hello!");

// Stream a response
const stream = await agent.stream("Tell me a story");
for await (const chunk of stream) {
  console.log(chunk);
}

:::info

Mastra uses the OpenAI-compatible /chat/completions endpoint. Some provider-specific features may not be available. Check the Nvidia documentation for details.

:::

Models

<ProviderModelsTable models={[ { "model": "nvidia/black-forest-labs/flux.1-dev", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 4096, "maxOutput": null, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-coder-6.7b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-r1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": true, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-r1-0528", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-v3.1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 128000, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-v3.1-terminus", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 128000, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/deepseek-ai/deepseek-v3.2", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 163840, "maxOutput": 65536, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/codegemma-1.1-7b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/codegemma-7b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-2-27b-it", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-2-2b-it", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-3-12b-it", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-3-1b-it", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-3-27b-it", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-3n-e2b-it", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/google/gemma-3n-e4b-it", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/codellama-70b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-3.1-405b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-3.1-70b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-3.2-11b-vision-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-3.2-1b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-3.3-70b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-4-maverick-17b-128e-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama-4-scout-17b-16e-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama3-70b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/meta/llama3-8b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3-medium-128k-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3-medium-4k-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 4000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3-small-128k-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3-small-8k-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 8000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3-vision-128k-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3.5-moe-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-3.5-vision-instruct", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/microsoft/phi-4-mini-instruct", "imageInput": true, "audioInput": true, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/minimaxai/minimax-m2.1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 204800, "maxOutput": 131072, "inputCost": null, "outputCost": null }, { "model": "nvidia/minimaxai/minimax-m2.5", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 204800, "maxOutput": 131072, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/codestral-22b-instruct-v0.1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/devstral-2-123b-instruct-2512", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/mamba-codestral-7b-v0.1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/ministral-14b-instruct-2512", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/mistral-large-2-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/mistral-large-3-675b-instruct-2512", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/mistralai/mistral-small-3.1-24b-instruct-2503", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/moonshotai/kimi-k2-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 128000, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/moonshotai/kimi-k2-instruct-0905", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/moonshotai/kimi-k2-thinking", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/moonshotai/kimi-k2.5", "imageInput": true, "audioInput": false, "videoInput": true, "toolUsage": true, "reasoning": true, "contextWindow": 262144, "maxOutput": 262144, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/cosmos-nemotron-34b", "imageInput": true, "audioInput": false, "videoInput": true, "toolUsage": false, "reasoning": true, "contextWindow": 131072, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-3.1-nemotron-51b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-3.1-nemotron-70b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-3.1-nemotron-ultra-253b-v1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-3.3-nemotron-super-49b-v1", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-3.3-nemotron-super-49b-v1.5", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama-embed-nemotron-8b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": 32768, "maxOutput": 2048, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/llama3-chatqa-1.5-70b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/nemoretriever-ocr-v1", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": null, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/nemotron-3-nano-30b-a3b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 131072, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/nemotron-4-340b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/nvidia-nemotron-nano-9b-v2", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 131072, "inputCost": null, "outputCost": null }, { "model": "nvidia/nvidia/parakeet-tdt-0.6b-v2", "imageInput": false, "audioInput": true, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": null, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/openai/gpt-oss-120b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": true, "contextWindow": 128000, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/openai/whisper-large-v3", "imageInput": false, "audioInput": true, "videoInput": false, "toolUsage": false, "reasoning": false, "contextWindow": null, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen2.5-coder-32b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen2.5-coder-7b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen3-235b-a22b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 131072, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen3-coder-480b-a35b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 262144, "maxOutput": 66536, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen3-next-80b-a3b-instruct", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": false, "contextWindow": 262144, "maxOutput": 16384, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen3-next-80b-a3b-thinking", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 262144, "maxOutput": 16384, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwen3.5-397b-a17b", "imageInput": true, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 262144, "maxOutput": 8192, "inputCost": null, "outputCost": null }, { "model": "nvidia/qwen/qwq-32b", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": false, "reasoning": true, "contextWindow": 128000, "maxOutput": 4096, "inputCost": null, "outputCost": null }, { "model": "nvidia/stepfun-ai/step-3.5-flash", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 256000, "maxOutput": 16384, "inputCost": null, "outputCost": null }, { "model": "nvidia/z-ai/glm4.7", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 204800, "maxOutput": 131072, "inputCost": null, "outputCost": null }, { "model": "nvidia/z-ai/glm5", "imageInput": false, "audioInput": false, "videoInput": false, "toolUsage": true, "reasoning": true, "contextWindow": 202752, "maxOutput": 131000, "inputCost": null, "outputCost": null } ]} />

Advanced configuration

Custom headers

typescript

const agent = new Agent({
  id: "custom-agent",
  name: "custom-agent",
  model: {
    url: "https://integrate.api.nvidia.com/v1",
    id: "nvidia/black-forest-labs/flux.1-dev",
    apiKey: process.env.NVIDIA_API_KEY,
    headers: {
      "X-Custom-Header": "value"
    }
  }
});

Dynamic model selection

typescript

const agent = new Agent({
  id: "dynamic-agent",
  name: "Dynamic Agent",
  model: ({ requestContext }) => {
    const useAdvanced = requestContext.task === "complex";
    return useAdvanced
      ? "nvidia/z-ai/glm5"
      : "nvidia/black-forest-labs/flux.1-dev";
  }
});