Some test text!

Search
Hamburger Icon

Windows / Guides / Text search

Search for text in a PDF on Windows

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
PDFDoc doc = new PDFDoc(filename);
Int32 page_num = 0;
String result_str = "", ambient_string = "";
Highlights hlts = new Highlights();
TextSearch txt_search = new TextSearch();
Int32 mode = (Int32)(TextSearch.SearchMode.e_whole_word | TextSearch.SearchMode.e_page_stop | TextSearch.SearchMode.e_highlight);
String pattern = "";

//use regular expression to find credit card number
mode |= (Int32)(TextSearch.SearchMode.e_reg_expression | TextSearch.SearchMode.e_highlight);
txt_search.SetMode(mode);
String pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
txt_search.SetPattern(pattern);

//call Begin() method to initialize the text search.
txt_search.Begin( doc, pattern, mode, -1, -1 );
TextSearch.ResultCode code = txt_search.Run(ref page_num, ref result_str, ref ambient_string, hlts );

if ( code == TextSearch.ResultCode.e_found )
{
  //add a link annotation based on the location of the found instance
  hlts.Begin(doc);
  while (hlts.HasNext())
  {
    Page cur_page = doc.GetPage(hlts.GetCurrentPageNumber());
    double[] quads = hlts.GetCurrentQuads();
    int quad_count = quads.Length / 8;
    for (int i = 0; i < quad_count; ++i)
    {
      //assume each quad is an axis-aligned rectangle
      int offset = 8 * i;
      double x1 = Math.Min(Math.Min(Math.Min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
      double x2 = Math.Max(Math.Max(Math.Max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
      double y1 = Math.Min(Math.Min(Math.Min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
      double y2 = Math.Max(Math.Max(Math.Max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);

      Annots.Link hyper_link = Annots.Link.Create(doc, new Rect(x1, y1, x2, y2), Action.CreateURI(doc, "http://www.pdftron.com"));
      hyper_link.RefreshAppearance();
      cur_page.AnnotPushBack(hyper_link);
    }
    hlts.Next();
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Chat with us