Some test text!

Search
Hamburger Icon

UWP / Guides / Text search

Search for text in a PDF in UWP

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
PDFDoc doc = new PDFDoc(filename);
Int32Ref pageNumber = new Int32Ref(0);
StringRef resultString = new StringRef();
StringRef ambientString = new StringRef();
Highlights highlights = new Highlights();
TextSearch textSearch = new TextSearch();

Int32 mode = (Int32)(TextSearchSearchMode.e_whole_word | TextSearchSearchMode.e_page_stop | TextSearchSearchMode.e_highlight);

//use regular expression to find credit card number
mode |= (Int32)(TextSearchSearchMode.e_reg_expression | TextSearchSearchMode.e_highlight);
textSearch.SetMode(mode);
String pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
textSearch.SetPattern(pattern);

//call Begin() method to initialize the text search.
textSearch.Begin(doc, pattern, mode, -1, -1);
TextSearchResultCode code = textSearch.Run(pageNumber, resultString, ambientString, highlights);

if (code == TextSearchResultCode.e_found)
{
  //add a link annotation based on the location of the found instance
  hlts.Begin(doc);
  while (hlts.HasNext())
  {
    Page cur_page = doc.GetPage(hlts.GetCurrentPageNumber());
    double[] quads = hlts.GetCurrentQuads();
    int quad_count = quads.Length / 8;
    for (int i = 0; i < quad_count; ++i)
    {
      //assume each quad is an axis-aligned rectangle
      int offset = 8 * i;
      double x1 = Math.Min(Math.Min(Math.Min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
      double x2 = Math.Max(Math.Max(Math.Max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
      double y1 = Math.Min(Math.Min(Math.Min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
      double y2 = Math.Max(Math.Max(Math.Max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);

      Annots.Link hyper_link = Annots.Link.Create(doc.GetSDFDoc(), new Rect(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"));
      hyper_link.RefreshAppearance();
      cur_page.AnnotPushBack(hyper_link);
    }
    hlts.Next();
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Chat with us