officefileapi-401136-pdf-document-api-examples-extract-content-from-a-pdf-document-how-to-get-coordinates-of-all-words-in-a-document.md
The following code snippet uses the PdfDocumentProcessor.NextWord method to iterate all words in a document and retrieve their coordinates.
The PdfDocumentProcessor.NextWord method returns an PdfPageWord object. The Rectangles property returns a rectangle encompassing the current word.
Tip
The Rectangles property returns more than one PdfOrientedRectangle object when a part of a word is carried over to the next line. Use the Segments property to obtain information about each part of the word.
using DevExpress.Pdf;
// Declare a list to store the word and its coordinates
List<Tuple<string, PdfOrientedRectangle>> WordCoordinates = new List<Tuple<string, PdfOrientedRectangle>>();
using (PdfDocumentProcessor processor = new PdfDocumentProcessor())
{
processor.LoadDocument("Document.pdf");
PdfPageWord currentWord = processor.NextWord();
while (currentWord != null)
{
for (int i = 0; i < currentWord.Rectangles.Count; i++)
{
// Retrieve the number of the page on which the word
// is located:
int pageNumber = currentWord.PageNumber;
// Retrieve the rectangle encompassing the word
var wordRectangle = currentWord.Rectangles[i];
// Add the segment's content and its coordinates to the list
WordCoordinates.Add(new Tuple<string, PdfOrientedRectangle>(currentWord.Segments[i].Text, wordRectangle));
}
// Switch to the next word
currentWord = processor.NextWord();
}
}
Imports DevExpress.Pdf
' Declare a list to store the word and its coordinates
Dim WordCoordinates As New List(Of Tuple(Of String, PdfOrientedRectangle))()
Using processor As New PdfDocumentProcessor()
processor.LoadDocument("Document.pdf")
Dim currentWord As PdfPageWord = processor.NextWord()
Do While currentWord IsNot Nothing
For i As Integer = 0 To currentWord.Rectangles.Count - 1
' Retrieve the number of the page on which the word
' is located:
Dim pageNumber As Integer = currentWord.PageNumber
' Retrieve the rectangle encompassing the word
Dim wordRectangle = currentWord.Rectangles(i)
' Add the segment's content and its coordinates to the list
WordCoordinates.Add(New Tuple(Of String, PdfOrientedRectangle)(currentWord.Segments(i).Text, wordRectangle))
Next i
' Switch to the next word
currentWord = processor.NextWord()
Loop
End Using