TextSearch

Sample Obj-C code for using Apryse SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our iOS SDK and PDF Indexed Search Library.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9// This sample illustrates the basic text search capabilities of PDFNet.
10
11int main(int argc, char *argv[])
12{
13 @autoreleasepool {
14
15 int ret = 0;
16 [PTPDFNet Initialize: 0];
17 NSString *input_path = @"../../TestFiles/credit card numbers.pdf";
18
19 @try
20 {
21 PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: input_path];
22 [doc InitSecurityHandler];
23
24 PTTextSearch *txt_search = [[PTTextSearch alloc] init];
25 unsigned int mode = e_ptwhole_word | e_ptpage_stop;
26 NSString *pattern = @"joHn sMiTh";
27
28 //call Begin() method to initialize the text search.
29 [txt_search Begin: doc pattern: pattern mode: mode start_page: -1 end_page: -1];
30
31 int step = 0;
32
33 //call Run() method iteratively to find all matching instances.
34 while ( YES )
35 {
36 PTSearchResult *result = [txt_search Run];
37
38 if ( result )
39 {
40 if ( step == 0 )
41 { //step 0: found "John Smith"
42 //note that, here, 'ambient_string' and 'hlts' are not written to,
43 //as 'e_ambient_string' and 'e_highlight' are not set.
44
45 NSLog(@"%@'s credit card number is: ", [result GetMatch]);
46 //now switch to using regular expressions to find John's credit card number
47 mode = [txt_search GetMode];
48 mode |= e_ptreg_expression | e_pthighlight;
49 [txt_search SetMode: mode];
50 pattern = @"\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
51 [txt_search SetPattern: pattern];
52
53 ++step;
54 }
55 else if ( step == 1 )
56 {
57 //step 1: found John's credit card number
58 NSLog(@" %@", [result GetMatch]);
59
60 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
61 //output the highlight info of the credit card number.
62 PTHighlights *hlts = [result GetHighlights];
63 [hlts Begin: doc];
64 while ( [hlts HasNext] )
65 {
66 NSLog(@"The current highlight is from page: %d", [hlts GetCurrentPageNumber]);
67 [hlts Next];
68 }
69
70 //see if there is an AMEX card number
71 pattern = @"\\d{4}-\\d{6}-\\d{5}";
72 [txt_search SetPattern: pattern];
73
74 ++step;
75 }
76 else if ( step == 2 )
77 {
78 //found an AMEX card number
79 NSLog(@"\nThere is an AMEX card number:\n %@", [result GetMatch]);
80
81 //change mode to find the owner of the credit card; supposedly, the owner's
82 //name proceeds the number
83 mode = [txt_search GetMode];
84 mode |= e_ptsearch_up;
85 [txt_search SetMode: mode];
86 pattern = @"[A-z]++ [A-z]++";
87 [txt_search SetPattern: pattern];
88
89 ++step;
90 }
91 else if ( step == 3 )
92 {
93 //found the owner's name of the AMEX card
94 NSLog(@"Is the owner's name:\n %@?\n", [result GetMatch]);
95
96 //add a link annotation based on the location of the found instance
97 PTHighlights *hlts = [result GetHighlights];
98 [hlts Begin: doc];
99 while ( [hlts HasNext] )
100 {
101 PTPage *cur_page = [doc GetPage: [hlts GetCurrentPageNumber]];
102 PTVectorQuadPoint *quads = [hlts GetCurrentQuads];
103 int i = 0;
104 for ( ; i < [quads size]; ++i )
105 {
106 //assume each quad is an axis-aligned rectangle
107 PTQuadPoint *q = [quads get: i];
108 double x1 = MIN(MIN(MIN([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
109 double x2 = MAX(MAX(MAX([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
110 double y1 = MIN(MIN(MIN([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
111 double y2 = MAX(MAX(MAX([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
112 PTPDFRect * rect = [[PTPDFRect alloc] initWithX1: x1 y1: y1 x2: x2 y2: y2];
113 PTAction *action = [PTAction CreateURI: [doc GetSDFDoc] uri: @"http://www.pdftron.com"];
114
115 PTLink *hyper_link = [PTLink CreateWithAction: [doc GetSDFDoc] pos: rect action: action];
116 [cur_page AnnotPushBack: hyper_link];
117 }
118 [hlts Next];
119 }
120 [doc SaveToFile: @"../../TestFiles/Output/credit card numbers_linked.pdf" flags: e_ptlinearized];
121
122 break;
123 }
124 }
125
126 else if ( [result IsPageEnd] )
127 {
128 //you can update your UI here, if needed
129 }
130
131 else
132 {
133 break;
134 }
135 }
136 }
137
138 @catch(NSException *e)
139 {
140 NSLog(@"%@", e.reason);
141 ret = 1;
142 }
143 [PTPDFNet Terminate: 0];
144 return ret;
145 }
146}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales