docs/documentation/getting-started/queries.mdx
Now that your environment is configured, select the codetab for your tool and run some queries.
We're now ready to execute a basic text search query. We'll look for matches where description matches running shoes where rating is greater than 2.
from paradedb import Match, ParadeDB
MockItem.objects.filter(
description=ParadeDB(Match('running shoes', operator='OR')),
rating__gt=2
).values('description', 'rating', 'category').order_by('rating')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.match_any(MockItem.description, "running shoes"), MockItem.rating > 2)
.order_by(MockItem.rating)
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running shoes")
.where(rating: 3..)
.select(:description, :rating, :category)
.order(:rating)
.limit(5)
description | rating | category
---------------------+--------+----------
White jogging shoes | 3 | Footwear
Generic shoes | 4 | Footwear
Sleek running shoes | 5 | Footwear
(3 rows)
||| is ParadeDB's custom match disjunction operator, which means "find me all documents containing
running OR shoes.
If we want all documents containing running AND shoes, we can use ParadeDB's &&& match conjunction operator.
from paradedb import Match, ParadeDB
MockItem.objects.filter(
description=ParadeDB(Match('running shoes', operator='AND')),
rating__gt=2
).values('description', 'rating', 'category').order_by('rating')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.match_all(MockItem.description, "running shoes"), MockItem.rating > 2)
.order_by(MockItem.rating)
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_all("running shoes")
.where(rating: 3..)
.select(:description, :rating, :category)
.order(:rating)
.limit(5)
description | rating | category
---------------------+--------+----------
Sleek running shoes | 5 | Footwear
(1 row)
Next, let's add BM25 scoring to the results, which sorts matches by relevance. To do this, we'll use pdb.score.
from paradedb import Match, ParadeDB, Score
MockItem.objects.filter(
description=ParadeDB(Match('running shoes', operator='OR')),
rating__gt=2
).annotate(
score=Score()
).values('description', 'score').order_by('-score')[:5]
from sqlalchemy import desc, select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.description, pdb.score(MockItem.id).label("score"))
.where(search.match_any(MockItem.description, "running shoes"), MockItem.rating > 2)
.order_by(desc("score"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running shoes")
.where(rating: 3..)
.with_score
.select(:description)
.order(search_score: :desc)
.limit(5)
description | score
---------------------+-----------
Sleek running shoes | 6.817111
Generic shoes | 3.8772602
White jogging shoes | 3.4849067
(3 rows)
Finally, let's also highlight the relevant portions of the documents that were matched.
To do this, we'll use pdb.snippet.
from paradedb import Match, ParadeDB, Score, Snippet
MockItem.objects.filter(
description=ParadeDB(Match('running shoes', operator='OR')),
rating__gt=2
).annotate(
snippet=Snippet('description'),
score=Score()
).values('description', 'snippet', 'score').order_by('-score')[:5]
from sqlalchemy import desc, select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.description,
pdb.snippet(MockItem.description).label("snippet"),
pdb.score(MockItem.id).label("score"),
)
.where(search.match_any(MockItem.description, "running shoes"), MockItem.rating > 2)
.order_by(desc("score"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running shoes")
.where(rating: 3..)
.with_snippet(:description)
.with_score
.select(:description)
.order(search_score: :desc)
.limit(5)
description | snippet | score
---------------------+-----------------------------------+-----------
Sleek running shoes | Sleek <b>running</b> <b>shoes</b> | 6.817111
Generic shoes | Generic <b>shoes</b> | 3.8772602
White jogging shoes | White jogging <b>shoes</b> | 3.4849067
(3 rows)
ParadeDB is highly optimized for quickly returning the Top K results out of the index. In SQL, this means queries that contain an ORDER BY...LIMIT:
from paradedb import Match, ParadeDB
MockItem.objects.filter(
description=ParadeDB(Match('running shoes', operator='OR'))
).values('description', 'rating', 'category').order_by('rating')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.match_any(MockItem.description, "running shoes"))
.order_by(MockItem.rating)
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running shoes")
.select(:description, :rating, :category)
.order(:rating)
.limit(5)
description | rating | category
---------------------+--------+----------
White jogging shoes | 3 | Footwear
Generic shoes | 4 | Footwear
Sleek running shoes | 5 | Footwear
(3 rows)
Faceted queries allow a single query to return both the Top K results and an aggregate value, which is more CPU-efficient than issuing two separate queries.
For example, the following query returns the top 3 results as well as the total number of results matched.
<CodeGroup> ```sql SQL SELECT description, rating, category, pdb.agg('{"value_count": {"field": "id"}}') OVER () FROM mock_items WHERE description ||| 'running shoes' ORDER BY rating LIMIT 5; ```from paradedb import Match, ParadeDB
(
MockItem.objects
.filter(description=ParadeDB(Match('running shoes', operator='OR')))
.order_by('rating')
.values('description', 'rating', 'category')[:5]
.facets(agg='{"value_count": {"field": "id"}}')
)
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import facets, search
base = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.match_any(MockItem.description, "running shoes"))
.order_by(MockItem.rating)
.limit(5)
)
stmt = facets.with_rows(base, agg=facets.value_count(field="id"), key_field=MockItem.id)
with Session(engine) as session:
rows = session.execute(stmt).all()
facets.extract(rows)
relation = MockItem.search(:description)
.matching_any("running shoes")
.with_agg(agg: ParadeDB::Aggregations.value_count(:id))
.order(:rating)
.select(:description, :rating, :category)
.limit(5)
rows = relation.to_a
facets = relation.aggregates
description | rating | category | agg
---------------------+--------+----------+----------------
White jogging shoes | 3 | Footwear | {"value": 3.0}
Generic shoes | 4 | Footwear | {"value": 3.0}
Sleek running shoes | 5 | Footwear | {"value": 3.0}
(3 rows)
That's it! Next, let's load your data to start running real queries.