docs/documentation/full-text/proximity.mdx
Proximity queries are used to match documents containing tokens that are within a certain token distance of one another.
The following query finds all documents where the token sleek is at most 1 token away from shoes.
from paradedb import ParadeDB, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity('sleek').within(1, 'shoes'))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = select(MockItem.description, MockItem.rating, MockItem.category).where(
search.proximity(MockItem.description, search.prox_str("sleek").within(1, "shoes"))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity("sleek").within(1, "shoes"))
.select(:description, :rating, :category)
## does not care about order -- the term on the left-hand side may appear before or after the term on the right-hand side.
To ensure that the left-hand term appears before the right-hand term, use ##>.
from paradedb import ParadeDB, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity('sleek').within(1, 'shoes', ordered=True))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_str("sleek").within(1, "shoes", ordered=True)))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity("sleek").within(1, "shoes", ordered: true))
.select(:description, :rating, :category)
In addition to exact tokens, proximity queries can also match against regex expressions.
The following query finds all documents where any token matching the regex query sl.* is at most 1 token away
from the token shoes.
from paradedb import ParadeDB, ProxRegex, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity('shoes').within(1, ProxRegex('sl.*')))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_regex("sl.*").within(1, "shoes")))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity(ParadeDB.regex_term("sl.*")).within(1, "shoes"))
.select(:description, :rating, :category)
By default, pdb.prox_regex will expand to the first 50 regex matches in each document. This limit can be overridden
by providing a second argument:
from paradedb import ParadeDB, ProxRegex, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity('shoes').within(1, ProxRegex('sl.*', max_expansions=100)))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_regex("sl.*", 100).within(1, "shoes")))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity(ParadeDB.regex_term("sl.*", max_expansions: 100)).within(1, "shoes"))
.select(:description, :rating, :category)
pdb.prox_array matches against an array of tokens instead of a single token. For example, the following query finds all
documents where any of the tokens sleek or white is within 1 token of shoes.
from paradedb import ParadeDB, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity(['sleek', 'white']).within(1, 'shoes'))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_array("sleek", "white").within(1, "shoes")))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity("sleek", "white").within(1, "shoes"))
.select(:description, :rating, :category)
pdb.prox_array can also take regex:
from paradedb import ParadeDB, ProxRegex, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity([ProxRegex('sl.*'), 'white']).within(1, 'shoes'))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_array(search.prox_regex("sl.*"), "white").within(1, "shoes")))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity(ParadeDB.regex_term("sl.*"), "white").within(1, "shoes"))
.select(:description, :rating, :category)
Multiple proximity clauses can be chained together:
<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ ('sleek' ## 1 ## 'running' ## 2 ## pdb.prox_array('sneakers', pdb.prox_regex('sho.*'))); ```from paradedb import ParadeDB, ProxRegex, Proximity
MockItem.objects.filter(
description=ParadeDB(Proximity('sleek').within(1, 'running').within(2, ['sneakers', ProxRegex('sho.*')]))
).values('description', 'rating', 'category')
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search
stmt = (
select(MockItem.description, MockItem.rating, MockItem.category)
.where(search.proximity(MockItem.description, search.prox_str("sleek").within(1, "running").within(2, search.prox_array('sneakers', search.prox_regex('sho.*')))))
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.near(ParadeDB.proximity("sleek").within(1, "running").within(2, ['sneakers', ParadeDB.regex_term('sho.*')]))
.select(:description, :rating, :category)