Sample C# code for using Apryse SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our Server SDK and PDF Indexed Search Library.
1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.Filters;
9using pdftron.SDF;
10using pdftron.PDF;
11
12
13namespace TextSearchTestCS
14{
15 // This sample illustrates various text search capabilities of PDFNet.
16
17 class Class1
18 {
19 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
20 static Class1() {}
21
22 static void Main(string[] args)
23 {
24 PDFNet.Initialize(PDFTronLicense.Key);
25
26 // Relative path to the folder containing test files.
27 string input_path = "../../../../TestFiles/";
28
29 // Sample code showing how to use high-level text extraction APIs.
30 try
31 {
32 using (PDFDoc doc = new PDFDoc(input_path + "credit card numbers.pdf"))
33 {
34 doc.InitSecurityHandler();
35
36 Int32 page_num = 0;
37 String result_str = "", ambient_string = "";
38 Highlights hlts = new Highlights();
39
40 TextSearch txt_search = new TextSearch();
41 Int32 mode = (Int32)(TextSearch.SearchMode.e_whole_word | TextSearch.SearchMode.e_page_stop | TextSearch.SearchMode.e_highlight);
42 String pattern = "joHn sMiTh";
43
44 //call Begin() method to initialize the text search.
45 txt_search.Begin( doc, pattern, mode, -1, -1 );
46
47 int step = 0;
48
49 //call Run() method iteratively to find all matching instances.
50 while ( true )
51 {
52 TextSearch.ResultCode code = txt_search.Run(ref page_num, ref result_str, ref ambient_string, hlts );
53
54 if ( code == TextSearch.ResultCode.e_found )
55 {
56 if ( step == 0 )
57 { //step 0: found "John Smith"
58 //note that, here, 'ambient_string' and 'hlts' are not written to,
59 //as 'e_ambient_string' and 'e_highlight' are not set.
60 Console.WriteLine(result_str + "'s credit card number is: ");
61
62 //now switch to using regular expressions to find John's credit card number
63 mode = txt_search.GetMode();
64 mode |= (Int32)(TextSearch.SearchMode.e_reg_expression | TextSearch.SearchMode.e_highlight);
65 txt_search.SetMode(mode);
66 pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
67 txt_search.SetPattern(pattern);
68
69 ++step;
70 }
71 else if ( step == 1 )
72 {
73 //step 1: found John's credit card number
74 //result_str.ConvertToAscii(char_buf, 32, true);
75 //cout << " " << char_buf << endl;
76 Console.WriteLine(" " + result_str);
77
78 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
79 //output the highlight info of the credit card number
80 hlts.Begin(doc);
81 while (hlts.HasNext())
82 {
83 Console.WriteLine("The current highlight is from page: " + hlts.GetCurrentPageNumber());
84 hlts.Next();
85 }
86
87 //see if there is an AMEX card number
88 pattern = "\\d{4}-\\d{6}-\\d{5}";
89 txt_search.SetPattern(pattern);
90
91 ++step;
92 }
93 else if ( step == 2 )
94 {
95 //found an AMEX card number
96 Console.WriteLine("\nThere is an AMEX card number:\n " + result_str);
97
98 //change mode to find the owner of the credit card; supposedly, the owner's
99 //name proceeds the number
100 mode = txt_search.GetMode();
101 mode |= (Int32)(TextSearch.SearchMode.e_search_up);
102 txt_search.SetMode(mode);
103 pattern = "[A-z]++ [A-z]++";
104 txt_search.SetPattern(pattern);
105
106 ++step;
107 }
108 else if ( step == 3 )
109 {
110 //found the owner's name of the AMEX card
111 Console.WriteLine("Is the owner's name:\n " + result_str + "?");
112
113 //add a link annotation based on the location of the found instance
114 hlts.Begin(doc);
115 while (hlts.HasNext())
116 {
117 Page cur_page = doc.GetPage(hlts.GetCurrentPageNumber());
118 double[] quads = hlts.GetCurrentQuads();
119 int quad_count = quads.Length / 8;
120 for (int i = 0; i < quad_count; ++i)
121 {
122 //assume each quad is an axis-aligned rectangle
123 int offset = 8 * i;
124 double x1 = Math.Min(Math.Min(Math.Min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
125 double x2 = Math.Max(Math.Max(Math.Max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
126 double y1 = Math.Min(Math.Min(Math.Min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
127 double y2 = Math.Max(Math.Max(Math.Max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
128
129 pdftron.PDF.Annots.Link hyper_link = pdftron.PDF.Annots.Link.Create(doc, new Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"));
130 hyper_link.RefreshAppearance();
131 cur_page.AnnotPushBack(hyper_link);
132 }
133 hlts.Next();
134 }
135 string output_path = "../../../../TestFiles/Output/";
136 doc.Save(output_path + "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized);
137
138 break;
139 }
140 }
141 else if ( code == TextSearch.ResultCode.e_page )
142 {
143 //you can update your UI here, if needed
144 }
145 else
146 {
147 break;
148 }
149 }
150 }
151 }
152
153 catch (PDFNetException e)
154 {
155 Console.WriteLine(e.Message);
156 }
157 PDFNet.Terminate();
158 }
159 }
160}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6// This sample shows how to use pdftron.PDF.TextSearch to search text on PDF pages
7// using regular expressions. TextSearch utility class builds on functionality
8// available in TextExtractor to simplify most common search operations.
9
10#include <PDF/PDFNet.h>
11#include <PDF/PDFDoc.h>
12#include <PDF/TextSearch.h>
13#include <PDF/Annot.h>
14#include <iostream>
15#include "../../LicenseKey/CPP/LicenseKey.h"
16
17using namespace std;
18using namespace pdftron;
19using namespace PDF;
20using namespace SDF;
21using namespace Common;
22
23#undef max
24#undef min
25#include <algorithm>
26
27int main(int argc, char *argv[])
28{
29 int ret = 0;
30 PDFNet::Initialize(LicenseKey);
31 std::string input_path = "../../TestFiles/credit card numbers.pdf";
32 const char* filein = argc>1 ? argv[1] : input_path.c_str();
33
34 try
35 {
36 PDFDoc doc(filein);
37 doc.InitSecurityHandler();
38
39 TextSearch txt_search;
40 TextSearch::Mode mode = TextSearch::e_whole_word | TextSearch::e_page_stop;
41 UString pattern( "joHn sMiTh" );
42
43 //call Begin() method to initialize the text search.
44 txt_search.Begin( doc, pattern, mode );
45
46 int step = 0;
47
48 //call Run() method iteratively to find all matching instances.
49 while ( true )
50 {
51 SearchResult result = txt_search.Run();
52
53 if ( result )
54 {
55 if ( step == 0 )
56 { // Step 0: found "John Smith"
57 // note that, here, 'ambient_string' and 'hlts' are not written to,
58 // as 'e_ambient_string' and 'e_highlight' are not set.
59
60 cout << result.GetMatch() << "'s credit card number is: " << endl;
61
62 //now switch to using regular expressions to find John's credit card number
63 mode = txt_search.GetMode();
64 mode |= TextSearch::e_reg_expression | TextSearch::e_highlight;
65 txt_search.SetMode(mode);
66 pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
67 txt_search.SetPattern(pattern);
68
69 ++step;
70 }
71 else if ( step == 1 )
72 {
73 //step 1: found John's credit card number
74 cout << " " << result.GetMatch() << endl;
75
76 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
77 //output the highlight info of the credit card number.
78 Highlights hlts = result.GetHighlights();
79 hlts.Begin(doc);
80 while ( hlts.HasNext() )
81 {
82 cout << "The current highlight is from page: " << hlts.GetCurrentPageNumber() << endl;
83 hlts.Next();
84 }
85
86 //see if there is an AMEX card number
87 pattern = "\\d{4}-\\d{6}-\\d{5}";
88 txt_search.SetPattern(pattern);
89
90 ++step;
91 }
92 else if ( step == 2 )
93 {
94 //found an AMEX card number
95 cout << "\nThere is an AMEX card number:\n " << result.GetMatch() << endl;
96
97 //change mode to find the owner of the credit card; supposedly, the owner's
98 //name proceeds the number
99 mode = txt_search.GetMode();
100 mode |= TextSearch::e_search_up;
101 txt_search.SetMode(mode);
102 pattern = "[A-z]++ [A-z]++";
103 txt_search.SetPattern(pattern);
104
105 ++step;
106 }
107 else if ( step == 3 )
108 {
109 //found the owner's name of the AMEX card
110 cout << "Is the owner's name:\n " << result.GetMatch() << "?\n" << flush;
111
112 //add a link annotation based on the location of the found instance
113 Highlights hlts = result.GetHighlights();
114 hlts.Begin(doc);
115 while ( hlts.HasNext() )
116 {
117 Page cur_page= doc.GetPage(hlts.GetCurrentPageNumber());
118 const double *quads;
119 int quad_count = hlts.GetCurrentQuads(quads);
120 for ( int i = 0; i < quad_count; ++i )
121 {
122 //assume each quad is an axis-aligned rectangle
123 const double *q = &quads[8*i];
124 double x1 = min(min(min(q[0], q[2]), q[4]), q[6]);
125 double x2 = max(max(max(q[0], q[2]), q[4]), q[6]);
126 double y1 = min(min(min(q[1], q[3]), q[5]), q[7]);
127 double y2 = max(max(max(q[1], q[3]), q[5]), q[7]);
128 Annots::Link hyper_link = Annots::Link::Create(doc, Rect(x1, y1, x2, y2), Action::CreateURI(doc, "http://www.pdftron.com"));
129 cur_page.AnnotPushBack(hyper_link);
130 }
131 hlts.Next();
132 }
133 std::string output_path = "../../TestFiles/Output/";
134 doc.Save((output_path + "credit card numbers_linked.pdf").c_str(), SDFDoc::e_linearized, 0);
135 break;
136 }
137 }
138 else if ( result.IsPageEnd() )
139 {
140 //you can update your UI here, if needed
141 }
142 else
143 {
144 assert (result.IsDocEnd());
145 break;
146 }
147 }
148 }
149 catch(Exception& e)
150 {
151 cout << e << endl;
152 ret = 1;
153 }
154 catch(...)
155 {
156 cout << "Unknown Exception" << endl;
157 ret = 1;
158 }
159
160 PDFNet::Terminate();
161 return ret;
162}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 "strconv"
10 . "math"
11 . "pdftron"
12)
13
14import "pdftron/Samples/LicenseKey/GO"
15
16// This sample illustrates the basic text search capabilities of PDFNet.
17
18// Relative path to the folder containing the test files.
19var inputPath = "../../TestFiles/"
20var outputPath = "../../TestFiles/Output/"
21
22func main(){
23 // Initialize PDFNet
24 PDFNetInitialize(PDFTronLicense.Key)
25 doc := NewPDFDoc(inputPath + "credit card numbers.pdf")
26 doc.InitSecurityHandler()
27
28 txtSearch := NewTextSearch()
29 mode := TextSearchE_whole_word | TextSearchE_page_stop
30
31 pattern := "joHn sMiTh"
32
33 // call Begin() method to initialize the text search.
34 txtSearch.Begin(doc, pattern, uint(mode))
35
36 step := 0
37
38 // call Run() method iteratively to find all matching instances.
39 for true{
40 searchResult := txtSearch.Run()
41 if searchResult.IsFound(){
42 if step == 0{
43 // step 0: found "John Smith"
44 // note that, here, 'ambient_string' and 'hlts' are not written to,
45 // as 'e_ambient_string' and 'e_highlight' are not set.
46
47 fmt.Println(searchResult.GetMatch() + "'s credit card number is: ")
48 // now switch to using regular expressions to find John's credit card number
49 mode := PdftronPDFTextSearchTextSearchModes(txtSearch.GetMode())
50 mode = mode | TextSearchE_reg_expression | TextSearchE_highlight
51 txtSearch.SetMode(uint(mode))
52 pattern := "\\d{4}-\\d{4}-\\d{4}-\\d{4}" //or "(\\d{4}-){3}\\d{4}"
53 txtSearch.SetPattern(pattern)
54 step = step + 1
55 }else if step == 1{
56 // step 1: found John's credit card number
57 fmt.Println(" " + searchResult.GetMatch())
58
59 // note that, here, 'hlts' is written to, as 'e_highligh' has been set.
60 // output the highlight info of the credit card number
61 hlts := searchResult.GetHighlights()
62 hlts.Begin(doc)
63 for hlts.HasNext(){
64 fmt.Println("The current highlight is from page: " + strconv.Itoa(hlts.GetCurrentPageNumber()))
65 hlts.Next()
66 }
67 // see if there is an AMEX card number
68 pattern := "\\d{4}-\\d{6}-\\d{5}"
69 txtSearch.SetPattern(pattern)
70
71 step = step + 1
72 }else if step == 2{
73 // found an AMEX card number
74 fmt.Println("\nThere is an AMEX card number:\n " + searchResult.GetMatch())
75
76 // change mode to find the owner of the credit card; supposedly, the owner's
77 // name proceeds the number
78 mode := PdftronPDFTextSearchTextSearchModes(txtSearch.GetMode())
79 mode = mode | TextSearchE_search_up
80 txtSearch.SetMode(uint(mode))
81 pattern := "[A-z]++ [A-z]++"
82 txtSearch.SetPattern(pattern)
83 step = step + 1
84 }else if step == 3{
85 // found the owner's name of the AMEX card
86 fmt.Println("Is the owner's name:\n " + searchResult.GetMatch() + "?")
87
88 // add a link annotation based on the location of the found instance
89 hlts := searchResult.GetHighlights()
90 hlts.Begin(doc)
91
92 for hlts.HasNext(){
93 curPage := doc.GetPage(uint(hlts.GetCurrentPageNumber()))
94 quadsInfo := hlts.GetCurrentQuads()
95
96 i := 0
97 for i < int(quadsInfo.Size()){
98 q := quadsInfo.Get(i)
99 // assume each quad is an axis-aligned rectangle
100 x1 := Min(Min(Min(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
101 x2 := Max(Max(Max(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
102 y1 := Min(Min(Min(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
103 y2 := Max(Max(Max(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
104 hyperLink := LinkCreate(doc.GetSDFDoc(), NewRect(x1, y1, x2, y2), ActionCreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"))
105 curPage.AnnotPushBack(hyperLink)
106 i = i + 1
107 }
108 hlts.Next()
109 }
110 doc.Save(outputPath + "credit card numbers_linked.pdf", uint(SDFDocE_linearized))
111 break
112 }
113 }else if searchResult.IsPageEnd(){
114 //you can update your UI here, if needed
115 }else{
116 break
117 }
118 }
119 doc.Close()
120 PDFNetTerminate()
121}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8import com.pdftron.sdf.SDFDoc;
9
10// This sample illustrates the basic text search capabilities of PDFNet.
11public class TextSearchTest {
12
13 public static void main(String[] args) {
14 PDFNet.initialize(PDFTronLicense.Key());
15 String input_path = "../../TestFiles/";
16
17 try (PDFDoc doc = new PDFDoc(input_path + "credit card numbers.pdf")) {
18 doc.initSecurityHandler();
19
20 TextSearch txt_search = new TextSearch();
21 int mode = TextSearch.e_whole_word | TextSearch.e_page_stop;
22
23 String pattern = "joHn sMiTh";
24
25 //PDFDoc doesn't allow simultaneous access from different threads. If this
26 //document could be used from other threads (e.g., the rendering thread inside
27 //PDFView/PDFViewCtrl, if used), it is good practice to lock it.
28 //Notice: don't forget to call doc.Unlock() to avoid deadlock.
29 doc.lock();
30
31 //call Begin() method to initialize the text search.
32 txt_search.begin(doc, pattern, mode, -1, -1);
33
34 int step = 0;
35
36 //call Run() method iteratively to find all matching instances.
37 while (true) {
38 TextSearchResult result = txt_search.run();
39
40 if (result.getCode() == TextSearchResult.e_found) {
41 if (step == 0) {
42 //step 0: found "John Smith"
43 //note that, here, 'ambient_string' and 'hlts' are not written to,
44 //as 'e_ambient_string' and 'e_highlight' are not set.
45 System.out.println(result.getResultStr() + "'s credit card number is: ");
46
47 //now switch to using regular expressions to find John's credit card number
48 mode = txt_search.getMode();
49 mode |= TextSearch.e_reg_expression | TextSearch.e_highlight;
50 txt_search.setMode(mode);
51 String new_pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
52 txt_search.setPattern(new_pattern);
53
54 step = step + 1;
55 } else if (step == 1) {
56 //step 1: found John's credit card number
57 System.out.println(" " + result.getResultStr());
58
59 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
60 //output the highlight info of the credit card number
61 Highlights hlts = result.getHighlights();
62 hlts.begin(doc);
63 while (hlts.hasNext()) {
64 System.out.println("The current highlight is from page: " + hlts.getCurrentPageNumber());
65 hlts.next();
66 }
67
68 //see if there is an AMEX card number
69 String new_pattern = "\\d{4}-\\d{6}-\\d{5}";
70 txt_search.setPattern(new_pattern);
71
72 step = step + 1;
73 } else if (step == 2) {
74 //found an AMEX card number
75 System.out.println("\nThere is an AMEX card number:");
76 System.out.println(" " + result.getResultStr());
77
78 //change mode to find the owner of the credit card; supposedly, the owner's
79 //name proceeds the number
80 mode = txt_search.getMode();
81 mode |= TextSearch.e_search_up;
82 txt_search.setMode(mode);
83 String new_pattern = "[A-z]++ [A-z]++";
84 txt_search.setPattern(new_pattern);
85
86 step = step + 1;
87 } else if (step == 3) {
88 //found the owner's name of the AMEX card
89 System.out.println("Is the owner's name:");
90 System.out.println(" " + result.getResultStr() + "?");
91
92 //add a link annotation based on the location of the found instance
93 Highlights hlts = result.getHighlights();
94 hlts.begin(doc);
95 while (hlts.hasNext()) {
96 Page cur_page = doc.getPage(hlts.getCurrentPageNumber());
97 double[] q = hlts.getCurrentQuads();
98 int quad_count = q.length / 8;
99 for (int i = 0; i < quad_count; ++i) {
100 //assume each quad is an axis-aligned rectangle
101 int offset = 8 * i;
102 double x1 = Math.min(Math.min(Math.min(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
103 double x2 = Math.max(Math.max(Math.max(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
104 double y1 = Math.min(Math.min(Math.min(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
105 double y2 = Math.max(Math.max(Math.max(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
106 com.pdftron.pdf.annots.Link hyper_link = com.pdftron.pdf.annots.Link.create(doc, new Rect(x1, y1, x2, y2), Action.createURI(doc, "http://www.pdftron.com"));
107 cur_page.annotPushBack(hyper_link);
108 }
109 hlts.next();
110 }
111 String output_path = "../../TestFiles/Output/";
112 doc.save(output_path + "credit card numbers_linked.pdf", SDFDoc.SaveMode.LINEARIZED, null);
113 // output PDF doc
114 break;
115 }
116 } else if (result.getCode() == TextSearchResult.e_page) {
117 //you can update your UI here, if needed
118 } else {
119 break;
120 }
121 }
122
123 doc.unlock();
124 } catch (PDFNetException e) {
125 System.out.println(e);
126 }
127
128 PDFNet.terminate();
129 }
130}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6
7const { PDFNet } = require('@pdftron/pdfnet-node');
8const PDFTronLicense = require('../LicenseKey/LicenseKey');
9
10((exports) => {
11
12 exports.runTextSearchTest = () => {
13
14 const main = async() => {
15 // Relative path to the folder containing test files.
16 const inputURL = '../TestFiles/';
17 const inputFilename = 'credit card numbers.pdf'; // addimage.pdf, newsletter.pdf
18
19 try {
20 const doc = await PDFNet.PDFDoc.createFromFilePath(inputURL + inputFilename);
21 doc.initSecurityHandler();
22
23 const txtSearch = await PDFNet.TextSearch.create();
24 let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
25 let pattern = 'joHn sMiTh';
26
27 txtSearch.begin(doc, pattern, mode); // searches for the "pattern" in the document while following the inputted modes.
28
29 let step = 0;
30
31 // call Run() iteratively to find all matching instances of the word 'joHn sMiTh'
32 /* eslint-disable-next-line no-constant-condition */
33 while (true) {
34 const result = await txtSearch.run();
35 let hlts;
36 if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
37 if (step === 0) { // Step 0: found "John Smith"
38 // note that, here, 'ambient_str' and 'highlights' are not written to,
39 // as 'e_ambient_string' and 'e_highlight' are not set.
40 console.log(result.out_str + "'s credit card number is: ");
41
42 // now switch to using regular expressions to find John's credit card number
43 mode = await txtSearch.getMode();
44 mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
45 txtSearch.setMode(mode);
46 pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
47 txtSearch.setPattern(pattern);
48
49 ++step;
50 } else if (step === 1) {
51 // step 1: found John's credit card number
52 console.log(' ' + result.out_str);
53 // note that, here, 'hlts' is written to, as 'e_highlight' has been set.
54 // output the highlight info of the credit card number.
55 hlts = result.highlights;
56 hlts.begin(doc);
57 while ((await hlts.hasNext())) {
58 const highlightPageNum = await hlts.getCurrentPageNumber();
59 console.log('The current highlight is from page: ' + highlightPageNum);
60 await hlts.next();
61 }
62 // see if there is an AMEX card number
63 pattern = '\\d{4}-\\d{6}-\\d{5}';
64 txtSearch.setPattern(pattern);
65
66 ++step;
67 } else if (step === 2) {
68 // found an AMEX card number
69 console.log('\nThere is an AMEX card number:\n ' + result.out_str);
70
71 // change mode to find the owner of the credit card; supposedly, the owner's
72 // name proceeds the number
73 mode = await txtSearch.getMode();
74 mode += PDFNet.TextSearch.Mode.e_search_up;
75 txtSearch.setMode(mode);
76 pattern = '[A-z]++ [A-z]++';
77 txtSearch.setPattern(pattern);
78
79 ++step;
80 } else if (step === 3) {
81 // found the owner's name of the AMEX card
82 console.log("Is the owner's name:\n " + result.out_str + '?');
83
84 // add a link annotation based on the location of the found instance
85 hlts = result.highlights;
86 await hlts.begin(doc); // is await needed?
87 while ((await hlts.hasNext())) {
88 const curPage = await doc.getPage((await hlts.getCurrentPageNumber()));
89 const quadArr = await hlts.getCurrentQuads();
90 for (let i = 0; i < quadArr.length; ++i) {
91 const currQuad = quadArr[i];
92 const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
93 const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
94 const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
95 const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
96
97 const hyperLink = await PDFNet.LinkAnnot.create(doc, (await PDFNet.Rect.init(x1, y1, x2, y2)));
98 await hyperLink.setAction((await PDFNet.Action.createURI(doc, 'http://www.pdftron.com')));
99 await curPage.annotPushBack(hyperLink);
100 }
101 hlts.next();
102 }
103 await doc.save('../TestFiles/Output/credit card numbers_linked.pdf', PDFNet.SDFDoc.SaveOptions.e_linearized);
104 break;
105 }
106 } else if (result.code === PDFNet.TextSearch.ResultCode.e_page) {
107 // you can update your UI here, if needed
108 console.log('page end');
109 } else if (result.code === PDFNet.TextSearch.ResultCode.e_done) {
110 break;
111 }
112 }
113 } catch (err) {
114 console.log(err);
115 }
116 };
117 PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){return PDFNet.shutdown();});
118 };
119 exports.runTextSearchTest();
120})(exports);
121// eslint-disable-next-line spaced-comment
122//# sourceURL=TextSearchTest.js
1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10// Relative path to the folder containing the test files.
11$input_path = getcwd()."/../../TestFiles/";
12$output_path = $input_path."Output/";
13
14 PDFNet::Initialize($LicenseKey);
15 PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
16
17 $doc = new PDFDoc($input_path."credit card numbers.pdf");
18 $doc->InitSecurityHandler();
19
20 $txt_search = new TextSearch();
21 $mode = TextSearch::e_whole_word | TextSearch::e_page_stop;
22 $pattern = "joHn sMiTh";
23
24 //call Begin() method to initialize the text search.
25 $txt_search->Begin( $doc, $pattern, $mode );
26
27 $step = 0;
28
29 //call Run() method iteratively to find all matching instances.
30 while ( true )
31 {
32 $searchResult = $txt_search->Run();
33 if ( $searchResult->IsFound() )
34 {
35 if ( $step == 0 )
36 { //step 0: found "John Smith"
37 //note that, here, 'ambient_string' and 'hlts' are not written to,
38 //as 'e_ambient_string' and 'e_highlight' are not set.
39
40 echo nl2br($searchResult->GetMatch()."'s credit card number is: \n");
41
42 //now switch to using regular expressions to find John's credit card number
43 $mode = $txt_search->GetMode();
44 $mode |= TextSearch::e_reg_expression | TextSearch::e_highlight;
45 $txt_search->SetMode($mode);
46 $pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
47 $txt_search->SetPattern($pattern);
48
49 ++$step;
50 }
51 else if ( $step == 1 )
52 {
53 //step 1: found John's credit card number
54 echo nl2br(" ".$searchResult->GetMatch()."\n");
55
56 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
57 //output the highlight info of the credit card number.
58 $hlts = $searchResult->GetHighlights();
59 $hlts->Begin($doc);
60 while ( $hlts->HasNext() )
61 {
62 echo nl2br("The current highlight is from page: ".$hlts->GetCurrentPageNumber()."\n");
63 $hlts->Next();
64 }
65
66 //see if there is an AMEX card number
67 $pattern = "\\d{4}-\\d{6}-\\d{5}";
68 $txt_search->SetPattern($pattern);
69
70 ++$step;
71 }
72 else if ( $step == 2 )
73 {
74 //found an AMEX card number
75 echo nl2br("\nThere is an AMEX card number:\n ".$searchResult->GetMatch()."\n");
76
77 //change mode to find the owner of the credit card; supposedly, the owner's
78 //name proceeds the number
79 $mode = $txt_search->GetMode();
80 $mode |= TextSearch::e_search_up;
81 $txt_search->SetMode($mode);
82 $pattern = "[A-z]++ [A-z]++";
83 $txt_search->SetPattern($pattern);
84
85 ++$step;
86 }
87 else if ( $step == 3 )
88 {
89 //found the owner's name of the AMEX card
90 echo nl2br("Is the owner's name:\n ".$searchResult->GetMatch()."?\n");
91
92 //add a link annotation based on the location of the found instance
93 $hlts = $searchResult->GetHighlights();
94 $hlts->Begin($doc);
95 while ( $hlts->HasNext() )
96 {
97 $cur_page= $doc->GetPage($hlts->GetCurrentPageNumber());
98 $quadsInfo = $hlts->GetCurrentQuads();
99
100 for ( $i = 0; $i < $quadsInfo->size(); ++$i )
101 {
102 //assume each quad is an axis-aligned rectangle
103 $q = $quadsInfo->get($i);
104 $x1 = min(min(min($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
105 $x2 = max(max(max($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
106 $y1 = min(min(min($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
107 $y2 = max(max(max($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
108 $hyper_link = Link::CreateAnnot($doc->GetSDFDoc(), new Rect($x1, $y1, $x2, $y2),
109 Action::CreateURI($doc->GetSDFDoc(), "http://www.pdftron.com"));
110 $cur_page->AnnotPushBack($hyper_link);
111 }
112 $hlts->Next();
113 }
114
115 $doc->Save($output_path."credit card numbers_linked.pdf", SDFDoc::e_linearized);
116
117 break;
118 }
119 }
120 else if ( $code == TextSearch::e_page )
121 {
122 //you can update your UI here, if needed
123 }
124 else
125 {
126 break;
127 }
128 }
129
130 $doc->Close();
131 PDFNet::Terminate();
132?>
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14# This sample illustrates the basic text search capabilities of PDFNet.
15
16# Relative path to the folder containing the test files.
17input_path = "../../TestFiles/"
18output_path = "../../TestFiles/Output/"
19
20def main():
21 # Initialize PDFNet
22 PDFNet.Initialize(LicenseKey)
23 doc = PDFDoc(input_path + "credit card numbers.pdf")
24 doc.InitSecurityHandler()
25
26 txt_search = TextSearch()
27 mode = TextSearch.e_whole_word | TextSearch.e_page_stop
28
29 pattern = "joHn sMiTh"
30
31 # call Begin() method to initialize the text search.
32 txt_search.Begin(doc, pattern, mode)
33
34 step = 0
35
36 # call Run() method iteratively to find all matching instances.
37 while True:
38 searchResult = txt_search.Run()
39 if searchResult.IsFound():
40 if step == 0:
41 # step 0: found "John Smith"
42 # note that, here, 'ambient_string' and 'hlts' are not written to,
43 # as 'e_ambient_string' and 'e_highlight' are not set.
44
45 print(str(searchResult.GetMatch()) + "'s credit card number is: ")
46
47 # now switch to using regular expressions to find John's credit card number
48 mode = txt_search.GetMode()
49 mode |= TextSearch.e_reg_expression | TextSearch.e_highlight
50 txt_search.SetMode(mode)
51 pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}" #or "(\\d{4}-){3}\\d{4}"
52 txt_search.SetPattern(pattern)
53 step = step + 1
54 elif step == 1:
55 # step 1: found John's credit card number
56 print(" " + searchResult.GetMatch())
57
58 # note that, here, 'hlts' is written to, as 'e_highligh' has been set.
59 # output the highlight info of the credit card number
60 hlts = searchResult.GetHighlights()
61 hlts.Begin(doc)
62 while hlts.HasNext():
63 print("The current highlight is from page: " + str(hlts.GetCurrentPageNumber()))
64 hlts.Next()
65
66 # see if there is an AMEX card number
67 pattern = "\\d{4}-\\d{6}-\\d{5}"
68 txt_search.SetPattern(pattern)
69
70 step = step + 1
71 elif step == 2:
72 # found an AMEX card number
73 print("\nThere is an AMEX card number:\n " + searchResult.GetMatch())
74
75 # change mode to find the owner of the credit card; supposedly, the owner's
76 # name proceeds the number
77 mode = txt_search.GetMode()
78 mode |= TextSearch.e_search_up
79 txt_search.SetMode(mode)
80 pattern = "[A-z]++ [A-z]++"
81 txt_search.SetPattern(pattern)
82 step = step + 1
83 elif step == 3:
84 # found the owner's name of the AMEX card
85 print("Is the owner's name:\n " + searchResult.GetMatch() + "?")
86
87 # add a link annotation based on the location of the found instance
88 hlts = searchResult.GetHighlights()
89 hlts.Begin(doc)
90
91 while (hlts.HasNext()):
92 cur_page = doc.GetPage(hlts.GetCurrentPageNumber())
93 quadsInfo = hlts.GetCurrentQuads()
94
95 i = 0
96 while i < len(quadsInfo):
97 q = quadsInfo[i]
98 # assume each quad is an axis-aligned rectangle
99 x1 = min(min(min(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
100 x2 = max(max(max(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
101 y1 = min(min(min(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
102 y2 = max(max(max(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
103 hyper_link = Link.Create(doc.GetSDFDoc(), Rect(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"))
104 cur_page.AnnotPushBack(hyper_link)
105 i = i + 1
106 hlts.Next()
107 doc.Save(output_path + "credit card numbers_linked.pdf", SDFDoc.e_linearized)
108 break
109 elif code == TextSearch.e_page:
110 pass
111 else:
112 break
113
114 doc.Close()
115 PDFNet.Terminate()
116
117if __name__ == '__main__':
118 main()
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12# This sample illustrates the basic text search capabilities of PDFNet.
13
14# Relative path to the folder containing the test files.
15input_path = "../../TestFiles/"
16output_path = "../../TestFiles/Output/"
17
18 # Initialize PDFNet
19 PDFNet.Initialize(PDFTronLicense.Key)
20 doc = PDFDoc.new(input_path + "credit card numbers.pdf")
21 doc.InitSecurityHandler
22
23 txt_search = TextSearch.new
24 mode = TextSearch::E_whole_word | TextSearch::E_page_stop
25
26 pattern = "joHn sMiTh"
27
28 # call Begin method to initialize the text search.
29 txt_search.Begin(doc, pattern, mode)
30
31 step = 0
32
33 # call Run method iteratively to find all matching instances.
34 while true do
35 searchResult = txt_search.Run
36 if searchResult.IsFound
37 case step
38 when 0
39 # step 0: found "John Smith"
40 # note that, here, 'ambient_string' and 'hlts' are not written to,
41 # as 'e_ambient_string' and 'e_highlight' are not set.
42
43 puts searchResult.GetMatch + "'s credit card number is: "
44
45 # now switch to using regular expressions to find John's credit card number
46 mode = txt_search.GetMode
47 mode |= TextSearch::E_reg_expression | TextSearch::E_highlight
48 txt_search.SetMode(mode)
49 pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}" #or "(\\d{4}-){3}\\d{4}"
50 txt_search.SetPattern(pattern)
51 step = step + 1
52 when 1
53 # step 1: found John's credit card number
54 puts " " + searchResult.GetMatch
55
56 # note that, here, 'hlts' is written to, as 'e_highligh' has been set.
57 # output the highlight info of the credit card number
58 hlts = searchResult.GetHighlights
59 hlts.Begin(doc)
60 while hlts.HasNext
61 puts "The current highlight is from page: " + hlts.GetCurrentPageNumber.to_s
62 hlts.Next
63 end
64
65 # see if there is an AMEX card number
66 pattern = "\\d{4}-\\d{6}-\\d{5}"
67 txt_search.SetPattern(pattern)
68
69 step = step + 1
70 when 2
71 # found an AMEX card number
72 puts "\nThere is an AMEX card number:\n " + searchResult.GetMatch
73
74 # change mode to find the owner of the credit card; supposedly, the owner's
75 # name proceeds the number
76 mode = txt_search.GetMode
77 mode |= TextSearch::E_search_up
78 txt_search.SetMode(mode)
79 pattern = "[A-z]++ [A-z]++"
80 txt_search.SetPattern(pattern)
81 step = step + 1
82 when 3
83 # found the owner's name of the AMEX card
84 puts "Is the owner's name:\n " + searchResult.GetMatch + "?"
85
86 # add a link annotation based on the location of the found instance
87 hlts = searchResult.GetHighlights
88 hlts.Begin(doc)
89
90 while hlts.HasNext do
91 cur_page = doc.GetPage(hlts.GetCurrentPageNumber)
92 quadsInfo = hlts.GetCurrentQuads
93
94 i = 0
95 while i < quadsInfo.size do
96 q = quadsInfo[i]
97 # assume each quad is an axis-aligned rectangle
98 x1 = [q.p1.x, q.p2.x, q.p3.x, q.p4.x].min
99 x2 = [q.p1.x, q.p2.x, q.p3.x, q.p4.x].max
100 y1 = [q.p1.y, q.p2.y, q.p3.y, q.p4.y].min
101 y2 = [q.p1.y, q.p2.y, q.p3.y, q.p4.y].max
102 hyper_link = Link.Create(doc.GetSDFDoc, Rect.new(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc, "http://www.pdftron.com"))
103 cur_page.AnnotPushBack(hyper_link)
104 i = i + 1
105 end
106 hlts.Next
107 end
108 doc.Save(output_path + "credit card numbers_linked.pdf", SDFDoc::E_linearized)
109 break
110 end
111 elsif code == TextSearch::E_page
112 else
113 break
114 end
115 end
116 doc.Close
117 PDFNet.Terminate
1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports pdftron
7Imports pdftron.Common
8Imports pdftron.Filters
9Imports pdftron.SDF
10Imports pdftron.PDF
11
12Module TextSearchTestVB
13 Dim pdfNetLoader As PDFNetLoader
14 Sub New()
15 pdfNetLoader = pdftron.PDFNetLoader.Instance()
16 End Sub
17
18 Sub Main()
19 PDFNet.Initialize(PDFTronLicense.Key)
20 Dim input_path As String = "../../../../TestFiles/"
21
22 Try
23
24 Using doc As PDFDoc = New PDFDoc(input_path & "credit card numbers.pdf")
25 doc.InitSecurityHandler()
26 Dim page_num As Int32 = 0
27 Dim result_str As String = "", ambient_string As String = ""
28 Dim hlts As Highlights = New Highlights()
29 Dim txt_search As TextSearch = New TextSearch()
30 Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
31 Dim pattern As String = "joHn sMiTh"
32 txt_search.Begin(doc, pattern, mode, -1, -1)
33 Dim step_ As Integer = 0
34
35 While True
36 Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)
37
38 If code = TextSearch.ResultCode.e_found Then
39
40 If step_ = 0 Then
41 Console.WriteLine(result_str & "'s credit card number is: ")
42 mode = txt_search.GetMode()
43 mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
44 txt_search.SetMode(mode)
45 pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
46 txt_search.SetPattern(pattern)
47 step_ += 1
48 ElseIf step_ = 1 Then
49 Console.WriteLine(" " & result_str)
50 hlts.Begin(doc)
51
52 While hlts.HasNext()
53 Console.WriteLine("The current highlight is from page: " & hlts.GetCurrentPageNumber())
54 hlts.Next()
55 End While
56
57 pattern = "\d{4}-\d{6}-\d{5}"
58 txt_search.SetPattern(pattern)
59 step_ += 1
60 ElseIf step_ = 2 Then
61 Console.WriteLine(vbLf & "There is an AMEX card number:" & vbLf & " " & result_str)
62 mode = txt_search.GetMode()
63 mode = mode Or CInt((TextSearch.SearchMode.e_search_up))
64 txt_search.SetMode(mode)
65 pattern = "[A-z]++ [A-z]++"
66 txt_search.SetPattern(pattern)
67 step_ += 1
68 ElseIf step_ = 3 Then
69 Console.WriteLine("Is the owner's name:" & vbLf & " " & result_str & "?")
70 hlts.Begin(doc)
71 While hlts.HasNext()
72 Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
73 Dim quads As Double() = hlts.GetCurrentQuads()
74 Dim quad_count As Integer = quads.Length / 8
75
76 For i As Integer = 0 To quad_count - 1
77 Dim offset As Integer = 8 * i
78 Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
79 Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
80 Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
81 Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
82 Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"))
83 hyper_link.RefreshAppearance()
84 cur_page.AnnotPushBack(hyper_link)
85 Next
86
87 hlts.Next()
88 End While
89
90 Dim output_path As String = "../../../../TestFiles/Output/"
91 doc.Save(output_path & "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized)
92 Exit While
93 End If
94 ElseIf code = TextSearch.ResultCode.e_page Then
95 Else
96 Exit While
97 End If
98 End While
99 End Using
100 Catch e As PDFNetException
101 Console.WriteLine(e.Message)
102 End Try
103 PDFNet.Terminate()
104 End Sub
105End Module
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales