content/docs/07-reference/01-ai-sdk-core/11-transcribe.mdx
transcribe()<Note type="warning">transcribe is an experimental feature.</Note>
Generates a transcript from an audio file.
import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';
const { text: transcript } = await transcribe({
model: openai.transcription('whisper-1'),
audio: await readFile('audio.mp3'),
});
console.log(transcript);
<Snippet
text={import { experimental_transcribe as transcribe } from "ai"}
prompt={false}
/>
<PropertiesTable
content={[
{
name: 'model',
type: 'TranscriptionModelV4',
description: 'The transcription model to use.',
},
{
name: 'audio',
type: 'DataContent (string | Uint8Array | ArrayBuffer | Buffer) | URL',
description: 'The audio file to generate the transcript from.',
},
{
name: 'providerOptions',
type: 'Record<string, JSONObject>',
isOptional: true,
description: 'Additional provider-specific options.',
},
{
name: 'maxRetries',
type: 'number',
isOptional: true,
description: 'Maximum number of retries. Default: 2.',
},
{
name: 'abortSignal',
type: 'AbortSignal',
isOptional: true,
description: 'An optional abort signal to cancel the call.',
},
{
name: 'headers',
type: 'Record<string, string>',
isOptional: true,
description: 'Additional HTTP headers for the request.',
},
{
name: 'download',
type: '(options: { url: URL; abortSignal?: AbortSignal }) => Promise<{ data: Uint8Array; mediaType: string | undefined }>',
isOptional: true,
description:
'Custom download function for fetching audio from URLs. Use createDownload() from ai to create a download function with custom size limits, e.g. createDownload({ maxBytes: 50 * 1024 * 1024 }). Default: built-in download with 2 GiB limit.',
},
]}
/>
<PropertiesTable content={[ { name: 'text', type: 'string', description: 'The complete transcribed text from the audio input.', }, { name: 'segments', type: 'Array<{ text: string; startSecond: number; endSecond: number }>', description: 'An array of transcript segments, each containing a portion of the transcribed text along with its start and end times in seconds.', }, { name: 'language', type: 'string | undefined', description: 'The language of the transcript in ISO-639-1 format e.g. "en" for English.', }, { name: 'durationInSeconds', type: 'number | undefined', description: 'The duration of the transcript in seconds.', }, { name: 'warnings', type: 'Warning[]', description: 'Warnings from the model provider (e.g. unsupported settings).', }, { name: 'providerMetadata', type: 'Record<string, JSONObject>', isOptional: true, description: 'Optional metadata from the provider. The outer key is the provider name. The inner values are the metadata. Details depend on the provider.', }, { name: 'responses', type: 'Array<TranscriptionModelResponseMetadata>', description: 'Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.', properties: [ { type: 'TranscriptionModelResponseMetadata', parameters: [ { name: 'timestamp', type: 'Date', description: 'Timestamp for the start of the generated response.', }, { name: 'modelId', type: 'string', description: 'The ID of the response model that was used to generate the response.', }, { name: 'headers', type: 'Record<string, string>', isOptional: true, description: 'Response headers.', }, ], }, ], }, ]} />