Back to Chroma

Search Basics

docs/mintlify/cloud/search-api/search-basics.mdx

1.5.910.6 KB
Original Source

import { Callout } from '/snippets/callout.mdx';

This page covers the basics of Search construction. For detailed usage of specific components, see:

The Search Class

<CodeGroup> ```python Python from chromadb import Search

Create an empty search

search = Search()

Direct construction with parameters

search = Search( where={"status": "active"}, rank={"$knn": {"query": [0.1, 0.2]}}, limit=10, select=["#document", "#score"] )


```typescript TypeScript
import { Search } from 'chromadb';

// Create an empty search
const search = new Search();

// Direct construction with parameters
const search2 = new Search({
  where: { status: "active" },
  rank: { $knn: { query: [0.1, 0.2] } },
  limit: 10,
  select: ["#document", "#score"]
});
rust
use chroma::types::{Key, QueryVector, RankExpr, SearchPayload};

let search = SearchPayload::default()
    .r#where(Key::field("status").eq("active"))
    .rank(RankExpr::Knn {
        query: QueryVector::Dense(vec![0.1, 0.2]),
        key: Key::Embedding,
        limit: 10,
        default: None,
        return_rank: false,
    })
    .limit(Some(10), 0)
    .select([Key::Document, Key::Score]);
</CodeGroup>

Constructor Parameters

The Search class accepts four optional parameters:

  • where: Filter expressions to narrow down results

    • Types: Where expression, dict, or None
    • Default: None (no filtering)
  • rank: Ranking expressions to score and order results

    • Types: Rank expression, dict, or None
    • Default: None (no ranking, natural order)
  • limit: Pagination control

    • Types: Limit object, dict, int, or None
    • Default: None (no limit)
  • select: Fields to include in results

    • Types: Select object, dict, list, set, or None
    • Default: None (returns IDs only)
    • Available fields: #id, #document, #embedding, #metadata, #score, or any custom metadata field
    • See field selection for details

Builder Pattern

The Search class provides a fluent interface with method chaining. Each method returns a new Search instance, making queries immutable and safe to reuse.

For detailed usage of each builder method, see the respective sections:

<CodeGroup> ```python Python from chromadb import Search, K, Knn

Basic method chaining

search = (Search() .where(K("status") == "published") .rank(Knn(query="machine learning applications")) .limit(10) .select(K.DOCUMENT, K.SCORE))

Each method returns a new instance

base_search = Search().where(K("category") == "science") search_v1 = base_search.limit(5) # New instance search_v2 = base_search.limit(10) # Different instance

Progressive building

search = Search() search = search.where(K("status") == "active") search = search.rank(Knn(query="recent advances in quantum computing")) search = search.limit(20) search = search.select(K.DOCUMENT, K.METADATA)


```typescript TypeScript
import { Search, K, Knn } from 'chromadb';

// Basic method chaining
const search = new Search()
  .where(K("status").eq("published"))
  .rank(Knn({ query: "machine learning applications" }))
  .limit(10)
  .select(K.DOCUMENT, K.SCORE);

// Each method returns a new instance
const baseSearch = new Search().where(K("category").eq("science"));
const searchV1 = baseSearch.limit(5);  // New instance
const searchV2 = baseSearch.limit(10); // Different instance

// Progressive building
let search2 = new Search();
search2 = search2.where(K("status").eq("active"));
search2 = search2.rank(Knn({ query: "recent advances in quantum computing" }));
search2 = search2.limit(20);
search2 = search2.select(K.DOCUMENT, K.METADATA);
rust
use chroma::types::{Key, QueryVector, RankExpr, SearchPayload};

let base = SearchPayload::default().r#where(Key::field("category").eq("science"));
let search_v1 = base.clone().limit(Some(5), 0);
let search_v2 = base.clone().limit(Some(10), 0);

let search = SearchPayload::default()
    .r#where(Key::field("status").eq("active"))
    .rank(RankExpr::Knn {
        query: QueryVector::Dense(vec![0.2, 0.4, 0.6]),
        key: Key::Embedding,
        limit: 20,
        default: None,
        return_rank: false,
    })
    .limit(Some(20), 0)
    .select([Key::Document, Key::Metadata]);
</CodeGroup>

Benefits of immutability:

  • Base queries can be reused safely
  • No unexpected side effects from modifications
  • Easy to create query variations

Direct Construction

You can create Search objects directly with various parameter types:

<CodeGroup> ```python Python from chromadb import Search, K, Knn from chromadb.execution.expression.operator import Limit, Select

With expression objects

search = Search( where=K("status") == "active", rank=Knn(query="latest research papers"), limit=Limit(limit=10, offset=0), select=Select(keys={K.DOCUMENT, K.SCORE}) )

Mixed types

search = Search( where=K("category") == "science", # Expression rank={"$knn": {"query": "quantum mechanics"}}, # Dictionary limit=10, # Integer select=[K.DOCUMENT, K.SCORE, "author"] # List )

Minimal search (IDs only)

search = Search()

Just filtering

search = Search(where=K("status") == "published")

Just ranking

search = Search(rank=Knn(query="artificial intelligence"))


```typescript TypeScript
import { Search, K, Knn } from 'chromadb';

// With expression objects
const search1 = new Search({
  where: K("status").eq("active"),
  rank: Knn({ query: "latest research papers" }),
  limit: { limit: 10, offset: 0 },
  select: [K.DOCUMENT, K.SCORE]
});

// With dictionaries (MongoDB-style)
const search2 = new Search({
  where: { status: "active" },
  rank: { $knn: { query: "latest research papers" } },
  limit: { limit: 10, offset: 0 },
  select: { keys: ["#document", "#score"] }
});

// Mixed types
const search3 = new Search({
  where: K("category").eq("science"),          // Expression
  rank: { $knn: { query: "quantum mechanics" } },  // Dictionary
  limit: 10,                                   // Number
  select: [K.DOCUMENT, K.SCORE, "author"]      // Array
});

// Minimal search (IDs only)
const search4 = new Search();

// Just filtering
const search5 = new Search({ where: K("status").eq("published") });

// Just ranking
const search6 = new Search({ rank: Knn({ query: "artificial intelligence" }) });
rust
use chroma::types::{Key, QueryVector, RankExpr, SearchPayload};

let search = SearchPayload::default()
    .r#where(Key::field("category").eq("science"))
    .rank(RankExpr::Knn {
        query: QueryVector::Dense(vec![0.1, 0.2, 0.3]),
        key: Key::Embedding,
        limit: 10,
        default: None,
        return_rank: false,
    })
    .limit(Some(10), 0)
    .select([Key::Document, Key::Score, Key::field("author")]);
</CodeGroup>

Empty Search Behavior

An empty Search object has specific default behaviors:

<CodeGroup> ```python Python # Empty search search = Search()

Equivalent to:

- where: None (returns all documents)

- rank: None (natural storage order)

- limit: None (no limit on results)

- select: None (returns IDs only)

result = collection.search(search)

Result contains only IDs, no documents/embeddings/metadata/scores

Add selection to get more fields

search = Search().select(K.DOCUMENT, K.METADATA) result = collection.search(search)

Now includes documents and metadata


```typescript TypeScript
// Empty search
const search = new Search();

// Equivalent to:
// - where: undefined (returns all documents)
// - rank: undefined (natural storage order)
// - limit: undefined (no limit on results)
// - select: empty (returns IDs only)

const result = await collection.search(search);
// Result contains only IDs, no documents/embeddings/metadata/scores

// Add selection to get more fields
const search2 = new Search().select(K.DOCUMENT, K.METADATA);
const result2 = await collection.search(search2);
// Now includes documents and metadata
</CodeGroup> <Callout> When no limit is specified, Chroma Cloud will apply a default limit based on your quota to prevent returning excessive results. For production use, it's recommended to always specify an explicit limit. </Callout>

Common Initialization Patterns

Here are common patterns for building Search queries:

<CodeGroup> ```python Python from chromadb import Search, K, Knn

Pattern 1: Baseline - no filter, no rank (natural storage order)

def get_documents(): return Search().select(K.DOCUMENT, K.METADATA)

Pattern 2: Filter only - no ranking

def filter_recent_science(): return (Search() .where((K("category") == "science") & (K("year") >= 2023)) .limit(10) .select(K.DOCUMENT, K.METADATA))

Pattern 3: Rank only - no filtering

def search_similar(query): return (Search() .rank(Knn(query=query)) .limit(10) .select(K.DOCUMENT, K.SCORE))

Pattern 4: Both filter and rank

def search_recent_science(query): return (Search() .where((K("category") == "science") & (K("year") >= 2023)) .rank(Knn(query=query)) .limit(10) .select(K.DOCUMENT, K.SCORE))


```typescript TypeScript
import { Search, K, Knn } from 'chromadb';

// Pattern 1: Baseline - no filter, no rank (natural storage order)
function getDocuments() {
  return new Search().select(K.DOCUMENT, K.METADATA);
}

// Pattern 2: Filter only - no ranking
function filterRecentScience() {
  return new Search()
    .where(K("category").eq("science").and(K("year").gte(2023)))
    .limit(10)
    .select(K.DOCUMENT, K.METADATA);
}

// Pattern 3: Rank only - no filtering
function searchSimilar(query: string) {
  return new Search()
    .rank(Knn({ query: query }))
    .limit(10)
    .select(K.DOCUMENT, K.SCORE);
}

// Pattern 4: Both filter and rank
function searchRecentScience(query: string) {
  return new Search()
    .where(K("category").eq("science").and(K("year").gte(2023)))
    .rank(Knn({ query: query }))
    .limit(10)
    .select(K.DOCUMENT, K.SCORE);
}
</CodeGroup>

Next Steps