Some test text!

Search
Hamburger Icon

Php / Guides / Text search

Search for text in a PDF in PHP

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
$doc = new PDFDoc($filename);
$txt_search = new TextSearch();
$mode = TextSearch::e_whole_word | TextSearch::e_page_stop;
$pattern = "";

//use regular expression to find credit card number
$mode |= TextSearch::e_reg_expression | TextSearch::e_highlight;
$txt_search->SetMode($mode);
$pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
$txt_search->SetPattern($pattern);

//call Begin() method to initialize the text search.
$txt_search->Begin( $doc, $pattern, $mode );
$searchResult = $txt_search->Run();

if ( $searchResult->IsFound() )
{
  //add a link annotation based on the location of the found instance
  $hlts = $searchResult->GetHighlights();
  $hlts->Begin($doc);
  while ( $hlts->HasNext() )
  {
    $cur_page= $doc->GetPage($hlts->GetCurrentPageNumber());
    $quadsInfo = $hlts->GetCurrentQuads();

    for ( $i = 0; $i < $quadsInfo->size(); ++$i )
    {
      //assume each quad is an axis-aligned rectangle
      $q = $quadsInfo->get($i);
      $x1 = min(min(min($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
      $x2 = max(max(max($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
      $y1 = min(min(min($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
      $y2 = max(max(max($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
      $hyper_link = Link::Create($doc->GetSDFDoc(), new Rect($x1, $y1, $x2, $y2), Action::CreateURI($doc->GetSDFDoc(), "http://www.pdftron.com"));
      $cur_page->AnnotPushBack($hyper_link);
    }
    $hlts->Next();
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Chat with us