Back to Paradedb

Terms

docs/documentation/aggregates/bucket/terms.mdx

0.24.04.1 KB
Original Source
<Note> If a text or JSON field is in the `GROUP BY` or `ORDER BY` clause, it must use the [literal](/documentation/tokenizers/available-tokenizers/literal) tokenizer. </Note>

A terms aggregation counts the number of occurrences for every unique value in a field. For example, the following query groups the mock_items table by rating, and calculates the number of items for each unique rating.

<CodeGroup> ```sql SQL SELECT rating, pdb.agg('{"value_count": {"field": "id"}}') FROM mock_items WHERE id @@@ pdb.all() GROUP BY rating LIMIT 10; ```
ts
import { search } from "@paradedb/drizzle-paradedb";

await db
  .select({
    rating: mockItems.rating,
    agg: search.agg({ value_count: { field: "id" } }),
  })
  .from(mockItems)
  .where(search.all(mockItems.id))
  .groupBy(mockItems.rating)
  .limit(10);
python
from paradedb import Agg, All, ParadeDB

MockItem.objects.filter(
    id=ParadeDB(All())
).values('rating').annotate(
    agg=Agg('{"value_count": {"field": "id"}}')
)[:10]
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import facets, pdb, search

stmt = (
    select(MockItem.rating, pdb.agg(facets.value_count(field="id")).label("agg"))
    .where(search.all(MockItem.id))
    .group_by(MockItem.rating)
    .limit(10)
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:id)
        .match_all
        .aggregate_by(
          :rating,
          agg: ParadeDB::Aggregations.value_count(:id)
        )
        .limit(10)
cs
await dbContext
    .MockItems.Where(item => EF.Functions.All(item.Id))
    .GroupBy(item => item.Rating)
    .Select(group => new
    {
        Rating = group.Key,
        Agg = EF.Functions.Agg(new { value_count = new { field = "id" } })
    })
    .Take(10)
    .ToListAsync();
</CodeGroup>
ini
 rating |       agg
--------+-----------------
      4 | {"value": 16.0}
      5 | {"value": 12.0}
      3 | {"value": 9.0}
      2 | {"value": 3.0}
      1 | {"value": 1.0}
(5 rows)

Ordering by the bucketing field is supported:

<CodeGroup> ```sql SQL SELECT rating, pdb.agg('{"value_count": {"field": "id"}}') FROM mock_items WHERE id @@@ pdb.all() GROUP BY rating ORDER BY rating LIMIT 10; ```
ts
import { search } from "@paradedb/drizzle-paradedb";

await db
  .select({
    rating: mockItems.rating,
    agg: search.agg({ value_count: { field: "id" } }),
  })
  .from(mockItems)
  .where(search.all(mockItems.id))
  .groupBy(mockItems.rating)
  .orderBy(mockItems.rating)
  .limit(10);
python
from paradedb import Agg, All, ParadeDB

MockItem.objects.filter(
    id=ParadeDB(All())
).values('rating').annotate(
    agg=Agg('{"value_count": {"field": "id"}}')
).order_by('rating')[:10]
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import facets, pdb, search

stmt = (
    select(MockItem.rating, pdb.agg(facets.value_count(field="id")).label("agg"))
    .where(search.all(MockItem.id))
    .group_by(MockItem.rating)
    .order_by(MockItem.rating)
    .limit(10)
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:id)
        .match_all
        .aggregate_by(
          :rating,
          agg: ParadeDB::Aggregations.value_count(:id)
        )
        .order(:rating)
        .limit(10)
cs
await dbContext
    .MockItems.Where(item => EF.Functions.All(item.Id))
    .GroupBy(item => item.Rating)
    .Select(group => new
    {
        Rating = group.Key,
        Agg = EF.Functions.Agg(new { value_count = new { field = "id" } })
    })
    .OrderBy(result => result.Rating)
    .Take(10)
    .ToListAsync();
</CodeGroup>

<Note>Ordering by the aggregate value is not yet supported.</Note>

For performance reasons, we strongly recommend adding a LIMIT to the GROUP BY. Terms aggregations without a LIMIT consume more memory and are slower to execute. If a query does not have a limit and more than 65000 unique values are found in a field, an error will be returned.