Back to Supermemory

Track Processing Status

apps/docs/memory-api/track-progress.mdx

latest6.1 KB
Original Source

Track your documents through the processing pipeline to provide better user experiences and handle edge cases.

Processing Pipeline

Each stage serves a specific purpose:

  • Queued: Document is waiting in the processing queue
  • Extracting: Content is being extracted (OCR for images, transcription for videos)
  • Chunking: Content is broken into optimal, searchable pieces
  • Embedding: Each chunk is converted to vector representations
  • Indexing: Vectors are added to the search index
  • Done: Document is fully processed and searchable
<Note> Processing time varies by content type. Plain text processes in seconds, while a 10-minute video might take 2-3 minutes. </Note>

Processing Documents

Monitor all documents currently being processed across your account.

GET /v3/documents/processing

<CodeGroup>
typescript

// Direct API call (not in SDK)
const response = await fetch('https://api.supermemory.ai/v3/documents/processing', {
  headers: {
    'Authorization': `Bearer ${SUPERMEMORY_API_KEY}`
  }
});

const processing = await response.json();
console.log(`${processing.documents.length} documents processing`);
python
# Direct API call (not in SDK)
import requests

response = requests.get(
    'https://api.supermemory.ai/v3/documents/processing',
    headers={'Authorization': f'Bearer {SUPERMEMORY_API_KEY}'}
)

processing = response.json()
print(f"{len(processing['documents'])} documents processing")
bash
curl -X GET "https://api.supermemory.ai/v3/documents/processing" \
  -H "Authorization: Bearer $SUPERMEMORY_API_KEY"
</CodeGroup>

Response Format

json
{
  "documents": [
    {
      "id": "doc_abc123",
      "status": "extracting",
      "created_at": "2024-01-15T10:30:00Z",
      "updated_at": "2024-01-15T10:30:15Z",
      "container_tags": ["research"],
      "metadata": {
        "source": "upload",
        "filename": "report.pdf"
      }
    },
    {
      "id": "doc_def456",
      "status": "chunking",
      "created_at": "2024-01-15T10:29:00Z",
      "updated_at": "2024-01-15T10:30:00Z",
      "container_tags": ["articles"],
      "metadata": {
        "source": "url",
        "url": "https://example.com/article"
      }
    }
  ],
  "total": 2
}

Individual Documents

Track specific document processing status.

GET /v3/documents/{id}

<CodeGroup>
typescript
const memory = await client.documents.get("doc_abc123");

console.log(`Status: ${memory.status}`);

// Poll for completion
while (memory.status !== 'done') {
  await new Promise(r => setTimeout(r, 2000));
  memory = await client.documents.get("doc_abc123");
  console.log(`Status: ${memory.status}`);
}
python
memory = client.documents.get("doc_abc123")

print(f"Status: {memory['status']}")

# Poll for completion
import time
while memory['status'] != 'done':
    time.sleep(2)
    memory = client.documents.get("doc_abc123")
    print(f"Status: {memory['status']}")
bash
curl -X GET "https://api.supermemory.ai/v3/documents/doc_abc123" \
  -H "Authorization: Bearer $SUPERMEMORY_API_KEY"
</CodeGroup>

Response Format

json
{
  "id": "doc_abc123",
  "status": "done",
  "content": "The original content...",
  "container_tags": ["research"],
  "metadata": {
    "source": "upload",
    "filename": "report.pdf"
  },
  "created_at": "2024-01-15T10:30:00Z",
  "updated_at": "2024-01-15T10:31:00Z"
}

For more comprehensive information on the get documents by ID endpoint, refer to the API Reference tab.

Status Values

StatusDescriptionTypical Duration
queuedWaiting to be processed< 5 seconds
extractingExtracting content from source5-30 seconds
chunkingBreaking into searchable pieces5-15 seconds
embeddingCreating vector representations10-30 seconds
indexingAdding to search index5-10 seconds
doneFully processed and searchable-
failedProcessing failed-

Polling Best Practices

When polling for status updates:

typescript
async function waitForProcessing(documentId: string, maxWaitMs = 300000) {
  const startTime = Date.now();
  const pollInterval = 2000; // 2 seconds

  while (Date.now() - startTime < maxWaitMs) {
    const doc = await client.documents.get(documentId);

    if (doc.status === 'done') {
      return doc;
    }

    if (doc.status === 'failed') {
      throw new Error(`Processing failed for ${documentId}`);
    }

    await new Promise(r => setTimeout(r, pollInterval));
  }

  throw new Error(`Timeout waiting for ${documentId}`);
}

Batch Processing

For multiple documents, track them efficiently:

typescript
async function trackBatch(documentIds: string[]) {
  const statuses = new Map();

  // Initial check
  for (const id of documentIds) {
    const doc = await client.documents.get(id);
    statuses.set(id, doc.status);
  }

  // Poll until all done
  while ([...statuses.values()].some(s => s !== 'done' && s !== 'failed')) {
    await new Promise(r => setTimeout(r, 5000)); // 5 second interval for batch

    for (const id of documentIds) {
      if (statuses.get(id) !== 'done' && statuses.get(id) !== 'failed') {
        const doc = await client.documents.get(id);
        statuses.set(id, doc.status);
      }
    }

    // Log progress
    const done = [...statuses.values()].filter(s => s === 'done').length;
    console.log(`Progress: ${done}/${documentIds.length} complete`);
  }

  return statuses;
}

Error Handling

Handle processing failures gracefully:

typescript
async function addWithRetry(content: string, maxRetries = 3) {
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    const { id } = await client.add({ content });

    try {
      const result = await waitForProcessing(id);
      return result;
    } catch (error) {
      console.error(`Attempt ${attempt} failed:`, error);

      if (attempt === maxRetries) {
        throw error;
      }

      // Exponential backoff
      await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt)));
    }
  }
}