Back to Paradedb

Proximity

docs/documentation/full-text/proximity.mdx

0.23.38.0 KB
Original Source

Proximity queries are used to match documents containing tokens that are within a certain token distance of one another.

Overview

The following query finds all documents where the token sleek is at most 1 token away from shoes.

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ ('sleek' ## 1 ## 'shoes'); ```
python
from paradedb import ParadeDB, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity('sleek').within(1, 'shoes'))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = select(MockItem.description, MockItem.rating, MockItem.category).where(
    search.proximity(MockItem.description, search.prox_str("sleek").within(1, "shoes"))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity("sleek").within(1, "shoes"))
        .select(:description, :rating, :category)
</CodeGroup> <Note> Like the [term](/documentation/full-text/term) query, the query string in a proximity query is treated as a finalized token. </Note>

## does not care about order -- the term on the left-hand side may appear before or after the term on the right-hand side. To ensure that the left-hand term appears before the right-hand term, use ##>.

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ ('sleek' ##> 1 ##> 'shoes'); ```
python
from paradedb import ParadeDB, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity('sleek').within(1, 'shoes', ordered=True))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_str("sleek").within(1, "shoes", ordered=True)))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity("sleek").within(1, "shoes", ordered: true))
        .select(:description, :rating, :category)
</CodeGroup>

Proximity Regex

In addition to exact tokens, proximity queries can also match against regex expressions.

The following query finds all documents where any token matching the regex query sl.* is at most 1 token away from the token shoes.

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ (pdb.prox_regex('sl.*') ## 1 ## 'shoes'); ```
python
from paradedb import ParadeDB, ProxRegex, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity('shoes').within(1, ProxRegex('sl.*')))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_regex("sl.*").within(1, "shoes")))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity(ParadeDB.regex_term("sl.*")).within(1, "shoes"))
        .select(:description, :rating, :category)
</CodeGroup>

By default, pdb.prox_regex will expand to the first 50 regex matches in each document. This limit can be overridden by providing a second argument:

<CodeGroup> ```sql SQL -- Expand up to 100 regex matches SELECT description, rating, category FROM mock_items WHERE description @@@ (pdb.prox_regex('sl.*', 100) ## 1 ## 'shoes'); ```
python
from paradedb import ParadeDB, ProxRegex, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity('shoes').within(1, ProxRegex('sl.*', max_expansions=100)))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_regex("sl.*", 100).within(1, "shoes")))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity(ParadeDB.regex_term("sl.*", max_expansions: 100)).within(1, "shoes"))
        .select(:description, :rating, :category)
</CodeGroup>

Proximity Array

pdb.prox_array matches against an array of tokens instead of a single token. For example, the following query finds all documents where any of the tokens sleek or white is within 1 token of shoes.

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ (pdb.prox_array('sleek', 'white') ## 1 ## 'shoes'); ```
python
from paradedb import ParadeDB, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity(['sleek', 'white']).within(1, 'shoes'))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_array("sleek", "white").within(1, "shoes")))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity("sleek", "white").within(1, "shoes"))
        .select(:description, :rating, :category)
</CodeGroup>

pdb.prox_array can also take regex:

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ (pdb.prox_array(pdb.prox_regex('sl.*'), 'white') ## 1 ## 'shoes'); ```
python
from paradedb import ParadeDB, ProxRegex, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity([ProxRegex('sl.*'), 'white']).within(1, 'shoes'))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_array(search.prox_regex("sl.*"), "white").within(1, "shoes")))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity(ParadeDB.regex_term("sl.*"), "white").within(1, "shoes"))
        .select(:description, :rating, :category)
</CodeGroup>

Proximity Chaining

Multiple proximity clauses can be chained together:

<CodeGroup> ```sql SQL SELECT description, rating, category FROM mock_items WHERE description @@@ ('sleek' ## 1 ## 'running' ## 2 ## pdb.prox_array('sneakers', pdb.prox_regex('sho.*'))); ```
python
from paradedb import ParadeDB, ProxRegex, Proximity

MockItem.objects.filter(
    description=ParadeDB(Proximity('sleek').within(1, 'running').within(2, ['sneakers', ProxRegex('sho.*')]))
).values('description', 'rating', 'category')
python
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import search

stmt = (
    select(MockItem.description, MockItem.rating, MockItem.category)
    .where(search.proximity(MockItem.description, search.prox_str("sleek").within(1, "running").within(2, search.prox_array('sneakers', search.prox_regex('sho.*')))))
)

with Session(engine) as session:
    session.execute(stmt).all()
ruby
MockItem.search(:description)
        .near(ParadeDB.proximity("sleek").within(1, "running").within(2, ['sneakers', ParadeDB.regex_term('sho.*')]))
        .select(:description, :rating, :category)
</CodeGroup>