docs/documentation/full-text/highlight.mdx
<Note>Highlighting is not supported for fuzzy search.</Note>
Highlighting refers to the practice of visually emphasizing the portions of a document that match a user's search query.
pdb.snippet(<column>) can be added to any query where a ParadeDB operator is present. pdb.snippet returns the single best snippet, sorted by relevance score.
The following query generates highlighted snippets against the description field.
from paradedb import Match, ParadeDB, Snippet
MockItem.objects.filter(
description=ParadeDB(Match('shoes', operator='OR'))
).annotate(
snippet=Snippet('description')
).values('id', 'snippet')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippet(MockItem.description).label("snippet"))
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description)
.select(:id)
.limit(5)
By default, <b></b> encloses the snippet. This can be configured with start_tag and end_tag:
from paradedb import Match, ParadeDB, Snippet
MockItem.objects.filter(
description=ParadeDB(Match('shoes', operator='OR'))
).annotate(
snippet=Snippet('description', start_sel='<i>', stop_sel='</i>')
).values('id', 'snippet')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippet(
MockItem.description,
start_tag="<i>",
end_tag="</i>",
).label("snippet"),
)
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description, start_tag: "<i>", end_tag: "</i>")
.select(:id)
.limit(5)
pdb.snippets(<column>) returns an array of snippets, allowing you to retrieve multiple highlighted matches from a document. This is particularly useful when a document has several relevant matches spread throughout its content.
from paradedb import Match, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(Match('artistic vase', operator='OR'))
).annotate(
snippets=Snippets('description', max_num_chars=15)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippets(MockItem.description, max_num_chars=15).label("snippets"))
.where(search.match_any(MockItem.description, "artistic vase"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("artistic vase")
.with_snippets(:description, max_chars: 15)
.select(:id)
.limit(5)
id | snippets
----+-----------------------------------------
19 | {<b>Artistic</b>,"ceramic <b>vase</b>"}
(1 row)
You can control the number and order of snippets returned using the limit, offset, and sort_by parameters.
For example, to get only the first snippet:
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, "limit" => 1) FROM mock_items WHERE description ||| 'running' LIMIT 5; ```from paradedb import Match, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(Match('running', operator='OR'))
).annotate(
snippets=Snippets('description', max_num_chars=15, limit=1)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippets(MockItem.description, max_num_chars=15, limit=1).label("snippets"))
.where(search.match_any(MockItem.description, "running"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running")
.with_snippets(:description, max_chars: 15, limit: 1)
.select(:id)
.limit(5)
To get the second snippet (by skipping the first one):
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, "limit" => 1, "offset" => 1) FROM mock_items WHERE description ||| 'running' LIMIT 5; ```from paradedb import Match, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(Match('running', operator='OR'))
).annotate(
snippets=Snippets('description', max_num_chars=15, limit=1, offset=1)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippets(MockItem.description, max_num_chars=15, limit=1, offset=1).label("snippets"),
)
.where(search.match_any(MockItem.description, "running"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running")
.with_snippets(:description, max_chars: 15, limit: 1, offset: 1)
.select(:id)
.limit(5)
Snippets can be sorted either by their relevance score ('score') or their position within the document ('position').
To sort snippets by their appearance in the document:
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, sort_by => 'position') FROM mock_items WHERE description ||| 'artistic vase' LIMIT 5; ```from paradedb import Match, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(Match('artistic vase', operator='OR'))
).annotate(
snippets=Snippets('description', max_num_chars=15, sort_by='position')
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippets(MockItem.description, max_num_chars=15, sort_by="position").label("snippets"),
)
.where(search.match_any(MockItem.description, "artistic vase"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("artistic vase")
.with_snippets(:description, max_chars: 15, sort_by: :position)
.select(:id)
.limit(5)
pdb.snippet_positions(<column>) returns the byte offsets in the original text where the snippets would appear. It returns a two-dimensional integer array where each nested pair is [start, end): the first value is the byte index of the first highlighted byte, and the second value is the byte index immediately after the last highlighted byte.
from paradedb import Match, ParadeDB, Snippet, SnippetPositions
MockItem.objects.filter(
description=ParadeDB(Match('shoes', operator='OR'))
).annotate(
snippet=Snippet('description'),
snippet_positions=SnippetPositions('description')
).values('id', 'snippet', 'snippet_positions')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippet(MockItem.description).label("snippet"),
pdb.snippet_positions(MockItem.description).label("snippet_positions"),
)
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description)
.with_snippet_positions(:description)
.select(:id)
.limit(5)
id | snippet | snippet_positions
----+----------------------------+-------------------
4 | White jogging <b>shoes</b> | {{14,19}}
3 | Sleek running <b>shoes</b> | {{14,19}}
5 | Generic <b>shoes</b> | {{8,13}}
(3 rows)