Some test text!

Search
Hamburger Icon

Nodejs / Guides / Text search

Search for text in a PDF in Node.js

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
async function main() {
  const doc = await PDFNet.PDFDoc.createFromURL(filename);
  const txtSearch = await PDFNet.TextSearch.create();
  let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
  let pattern = '';

  //use regular expression to find credit card number
  mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
  txtSearch.setMode(mode);
  pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
  txtSearch.setPattern(pattern);

  //call Begin() method to initialize the text search.
  txtSearch.begin(doc, pattern, mode);
  const result = await txtSearch.run();

  if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
    // add a link annotation based on the location of the found instance
    hlts = result.highlights;
    await hlts.begin(doc); // is await needed?
    while (await hlts.hasNext()) {
      const curPage = await doc.getPage(await hlts.getCurrentPageNumber());
      const quadArr = await hlts.getCurrentQuads();
      for (let i = 0; i < quadArr.length; ++i) {
        const currQuad = quadArr[i];
        const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
        const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
        const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
        const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);

        const hyperLink = await PDFNet.LinkAnnot.create(doc, await PDFNet.Rect.init(x1, y1, x2, y2));
        await hyperLink.setAction(await PDFNet.Action.createURI(doc, 'http://www.pdftron.com'));
        await curPage.annotPushBack(hyperLink);
      }
      hlts.next();
    }
  }
}
PDFNet.runWithCleanup(main);

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Chat with us