docs/documentation/full-text/highlight.mdx
<Note>Highlighting is not supported for fuzzy search.</Note>
Highlighting refers to the practice of visually emphasizing the portions of a document that match a user's search query.
pdb.snippet(<column>) can be added to any query where a ParadeDB operator is present. pdb.snippet returns the single best snippet, sorted by relevance score.
The following query generates highlighted snippets against the description field.
import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippet: search.snippet(mockItems.description),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "shoes"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippet
MockItem.objects.filter(
description=ParadeDB(MatchAny('shoes'))
).annotate(
snippet=Snippet('description')
).values('id', 'snippet')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippet(MockItem.description).label("snippet"))
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description)
.select(:id)
.limit(5)
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "shoes"))
.Select(item => new { item.Id, Snippet = EF.Functions.Snippet(item.Description) })
.Take(5)
.ToListAsync();
By default, <b></b> encloses the snippet. This can be configured with start_tag and end_tag:
import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippet: search.snippet(mockItems.description, {
startTag: "<i>",
endTag: "</i>",
}),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "shoes"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippet
MockItem.objects.filter(
description=ParadeDB(MatchAny('shoes'))
).annotate(
snippet=Snippet('description', start_sel='<i>', stop_sel='</i>')
).values('id', 'snippet')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippet(
MockItem.description,
start_tag="<i>",
end_tag="</i>",
).label("snippet"),
)
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description, start_tag: "<i>", end_tag: "</i>")
.select(:id)
.limit(5)
var snippetOptions = new SnippetOptions { StartTag = "<i>", EndTag = "</i>" };
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "shoes"))
.Select(item => new
{
item.Id,
Snippet = EF.Functions.Snippet(item.Description, snippetOptions)
})
.Take(5)
.ToListAsync();
pdb.snippets(<column>) returns an array of snippets, allowing you to retrieve multiple highlighted matches from a document. This is particularly useful when a document has several relevant matches spread throughout its content.
import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippets: search.snippets(mockItems.description, { maxNumChars: 15 }),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "artistic vase"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(MatchAny('artistic vase'))
).annotate(
snippets=Snippets('description', max_num_chars=15)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippets(MockItem.description, max_num_chars=15).label("snippets"))
.where(search.match_any(MockItem.description, "artistic vase"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("artistic vase")
.with_snippets(:description, max_chars: 15)
.select(:id)
.limit(5)
var snippetsOptions = new SnippetsOptions { MaxNumChars = 15 };
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "artistic vase"))
.Select(item => new
{
item.Id,
Snippets = EF.Functions.Snippets(item.Description, snippetsOptions)
})
.Take(5)
.ToListAsync();
id | snippets
----+-----------------------------------------
19 | {<b>Artistic</b>,"ceramic <b>vase</b>"}
(1 row)
You can control the number and order of snippets returned using the limit, offset, and sort_by parameters.
For example, to get only the first snippet:
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, "limit" => 1) FROM mock_items WHERE description ||| 'running' LIMIT 5; ```import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippets: search.snippets(mockItems.description, {
maxNumChars: 15,
limit: 1,
}),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "running"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(MatchAny('running'))
).annotate(
snippets=Snippets('description', max_num_chars=15, limit=1)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(MockItem.id, pdb.snippets(MockItem.description, max_num_chars=15, limit=1).label("snippets"))
.where(search.match_any(MockItem.description, "running"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running")
.with_snippets(:description, max_chars: 15, limit: 1)
.select(:id)
.limit(5)
var snippetsOptions = new SnippetsOptions { MaxNumChars = 15, Limit = 1 };
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "running"))
.Select(item => new
{
item.Id,
Snippets = EF.Functions.Snippets(item.Description, snippetsOptions)
})
.Take(5)
.ToListAsync();
To get the second snippet (by skipping the first one):
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, "limit" => 1, "offset" => 1) FROM mock_items WHERE description ||| 'running' LIMIT 5; ```import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippets: search.snippets(mockItems.description, {
maxNumChars: 15,
limit: 1,
offset: 1,
}),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "running"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(MatchAny('running'))
).annotate(
snippets=Snippets('description', max_num_chars=15, limit=1, offset=1)
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippets(MockItem.description, max_num_chars=15, limit=1, offset=1).label("snippets"),
)
.where(search.match_any(MockItem.description, "running"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("running")
.with_snippets(:description, max_chars: 15, limit: 1, offset: 1)
.select(:id)
.limit(5)
var snippetsOptions = new SnippetsOptions { MaxNumChars = 15, Limit = 1, Offset = 1 };
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "running"))
.Select(item => new
{
item.Id,
Snippets = EF.Functions.Snippets(item.Description, snippetsOptions)
})
.Take(5)
.ToListAsync();
Snippets can be sorted either by their relevance score ('score') or their position within the document ('position').
To sort snippets by their appearance in the document:
<CodeGroup> ```sql SQL SELECT id, pdb.snippets(description, max_num_chars => 15, sort_by => 'position') FROM mock_items WHERE description ||| 'artistic vase' LIMIT 5; ```import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippets: search.snippets(mockItems.description, {
maxNumChars: 15,
sortBy: "position",
}),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "artistic vase"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippets
MockItem.objects.filter(
description=ParadeDB(MatchAny('artistic vase'))
).annotate(
snippets=Snippets('description', max_num_chars=15, sort_by='position')
).values('id', 'snippets')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippets(MockItem.description, max_num_chars=15, sort_by="position").label("snippets"),
)
.where(search.match_any(MockItem.description, "artistic vase"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("artistic vase")
.with_snippets(:description, max_chars: 15, sort_by: :position)
.select(:id)
.limit(5)
var snippetsOptions = new SnippetsOptions { MaxNumChars = 15, SortBy = "position" };
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "artistic vase"))
.Select(item => new
{
item.Id,
Snippets = EF.Functions.Snippets(item.Description, snippetsOptions)
})
.Take(5)
.ToListAsync();
pdb.snippet_positions(<column>) returns the byte offsets in the original text where the snippets would appear. It returns a two-dimensional integer array where each nested pair is [start, end): the first value is the byte index of the first highlighted byte, and the second value is the byte index immediately after the last highlighted byte.
import { search } from "@paradedb/drizzle-paradedb";
await db
.select({
id: mockItems.id,
snippet: search.snippet(mockItems.description),
snippetPositions: search.snippetPositions(mockItems.description),
})
.from(mockItems)
.where(search.matchAny(mockItems.description, "shoes"))
.limit(5);
from paradedb import MatchAny, ParadeDB, Snippet, SnippetPositions
MockItem.objects.filter(
description=ParadeDB(MatchAny('shoes'))
).annotate(
snippet=Snippet('description'),
snippet_positions=SnippetPositions('description')
).values('id', 'snippet', 'snippet_positions')[:5]
from sqlalchemy import select
from sqlalchemy.orm import Session
from paradedb.sqlalchemy import pdb, search
stmt = (
select(
MockItem.id,
pdb.snippet(MockItem.description).label("snippet"),
pdb.snippet_positions(MockItem.description).label("snippet_positions"),
)
.where(search.match_any(MockItem.description, "shoes"))
.limit(5)
)
with Session(engine) as session:
session.execute(stmt).all()
MockItem.search(:description)
.matching_any("shoes")
.with_snippet(:description)
.with_snippet_positions(:description)
.select(:id)
.limit(5)
await dbContext
.MockItems.Where(item => EF.Functions.MatchAny(item.Description, "shoes"))
.Select(item => new
{
item.Id,
Snippet = EF.Functions.Snippet(item.Description),
SnippetPositions = EF.Functions.SnippetPositions(item.Description)
})
.Take(5)
.ToListAsync();
id | snippet | snippet_positions
----+----------------------------+-------------------
4 | White jogging <b>shoes</b> | {{14,19}}
3 | Sleek running <b>shoes</b> | {{14,19}}
5 | Generic <b>shoes</b> | {{8,13}}
(3 rows)