Sample Obj-C code for using Apryse SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our iOS SDK and PDF Indexed Search Library.
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9// This sample illustrates the basic text search capabilities of PDFNet.
10
11int main(int argc, char *argv[])
12{
13 @autoreleasepool {
14
15 int ret = 0;
16 [PTPDFNet Initialize: 0];
17 NSString *input_path = @"../../TestFiles/credit card numbers.pdf";
18
19 @try
20 {
21 PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: input_path];
22 [doc InitSecurityHandler];
23
24 PTTextSearch *txt_search = [[PTTextSearch alloc] init];
25 unsigned int mode = e_ptwhole_word | e_ptpage_stop;
26 NSString *pattern = @"joHn sMiTh";
27
28 //call Begin() method to initialize the text search.
29 [txt_search Begin: doc pattern: pattern mode: mode start_page: -1 end_page: -1];
30
31 int step = 0;
32
33 //call Run() method iteratively to find all matching instances.
34 while ( YES )
35 {
36 PTSearchResult *result = [txt_search Run];
37
38 if ( result )
39 {
40 if ( step == 0 )
41 { //step 0: found "John Smith"
42 //note that, here, 'ambient_string' and 'hlts' are not written to,
43 //as 'e_ambient_string' and 'e_highlight' are not set.
44
45 NSLog(@"%@'s credit card number is: ", [result GetMatch]);
46 //now switch to using regular expressions to find John's credit card number
47 mode = [txt_search GetMode];
48 mode |= e_ptreg_expression | e_pthighlight;
49 [txt_search SetMode: mode];
50 pattern = @"\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
51 [txt_search SetPattern: pattern];
52
53 ++step;
54 }
55 else if ( step == 1 )
56 {
57 //step 1: found John's credit card number
58 NSLog(@" %@", [result GetMatch]);
59
60 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
61 //output the highlight info of the credit card number.
62 PTHighlights *hlts = [result GetHighlights];
63 [hlts Begin: doc];
64 while ( [hlts HasNext] )
65 {
66 NSLog(@"The current highlight is from page: %d", [hlts GetCurrentPageNumber]);
67 [hlts Next];
68 }
69
70 //see if there is an AMEX card number
71 pattern = @"\\d{4}-\\d{6}-\\d{5}";
72 [txt_search SetPattern: pattern];
73
74 ++step;
75 }
76 else if ( step == 2 )
77 {
78 //found an AMEX card number
79 NSLog(@"\nThere is an AMEX card number:\n %@", [result GetMatch]);
80
81 //change mode to find the owner of the credit card; supposedly, the owner's
82 //name proceeds the number
83 mode = [txt_search GetMode];
84 mode |= e_ptsearch_up;
85 [txt_search SetMode: mode];
86 pattern = @"[A-z]++ [A-z]++";
87 [txt_search SetPattern: pattern];
88
89 ++step;
90 }
91 else if ( step == 3 )
92 {
93 //found the owner's name of the AMEX card
94 NSLog(@"Is the owner's name:\n %@?\n", [result GetMatch]);
95
96 //add a link annotation based on the location of the found instance
97 PTHighlights *hlts = [result GetHighlights];
98 [hlts Begin: doc];
99 while ( [hlts HasNext] )
100 {
101 PTPage *cur_page = [doc GetPage: [hlts GetCurrentPageNumber]];
102 PTVectorQuadPoint *quads = [hlts GetCurrentQuads];
103 int i = 0;
104 for ( ; i < [quads size]; ++i )
105 {
106 //assume each quad is an axis-aligned rectangle
107 PTQuadPoint *q = [quads get: i];
108 double x1 = MIN(MIN(MIN([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
109 double x2 = MAX(MAX(MAX([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
110 double y1 = MIN(MIN(MIN([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
111 double y2 = MAX(MAX(MAX([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
112 PTPDFRect * rect = [[PTPDFRect alloc] initWithX1: x1 y1: y1 x2: x2 y2: y2];
113 PTAction *action = [PTAction CreateURI: [doc GetSDFDoc] uri: @"http://www.pdftron.com"];
114
115 PTLink *hyper_link = [PTLink CreateWithAction: [doc GetSDFDoc] pos: rect action: action];
116 [cur_page AnnotPushBack: hyper_link];
117 }
118 [hlts Next];
119 }
120 [doc SaveToFile: @"../../TestFiles/Output/credit card numbers_linked.pdf" flags: e_ptlinearized];
121
122 break;
123 }
124 }
125
126 else if ( [result IsPageEnd] )
127 {
128 //you can update your UI here, if needed
129 }
130
131 else
132 {
133 break;
134 }
135 }
136 }
137
138 @catch(NSException *e)
139 {
140 NSLog(@"%@", e.reason);
141 ret = 1;
142 }
143 [PTPDFNet Terminate: 0];
144 return ret;
145 }
146}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import PDFNet
7import Foundation
8
9// This sample illustrates the basic text search capabilities of PDFNet.
10
11func runTextSearchTest() -> Int {
12 return autoreleasepool {
13 var ret: Int = 0
14
15 let input_path: String? = Bundle.main.path(forResource: "credit card numbers", ofType: "pdf")
16
17 do {
18 try PTPDFNet.catchException {
19 let doc: PTPDFDoc! = PTPDFDoc(filepath: input_path)
20 doc.initSecurityHandler()
21
22 let txt_search: PTTextSearch! = PTTextSearch()
23 var mode = e_ptwhole_word.rawValue | e_ptpage_stop.rawValue
24 var pattern = "joHn sMiTh"
25
26 //call Begin() method to initialize the text search.
27 txt_search.begin(doc, pattern: pattern, mode: mode, start_page: -1, end_page: -1)
28
29 var step: Int = 0
30
31 //call run() method iteratively to find all matching instances.
32 while true {
33 let result: PTSearchResult! = txt_search.run()
34
35 if (result != nil) {
36 if step == 0 {
37 //step 0: found "John Smith"
38 //note that, here, 'ambient_string' and 'hlts' are not written to,
39 //as 'e_ambient_string' and 'e_highlight' are not set.
40
41 print("\(result.getMatch()!)'s credit card number is: ")
42 //now switch to using regular expressions to find John's credit card number
43 mode = txt_search.getMode()
44 mode |= e_ptreg_expression.rawValue | e_pthighlight.rawValue
45 txt_search.setMode(mode)
46 pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"
47 //or "(\\d{4}-){3}\\d{4}"
48 txt_search.setPattern(pattern)
49
50 step += 1
51 } else if step == 1 {
52 //step 1: found John's credit card number
53 print(" \(result.getMatch()!)")
54
55 //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
56 //output the highlight info of the credit card number.
57 let hlts: PTHighlights = result.getHighlights()
58 hlts.begin(doc)
59 while hlts.hasNext() {
60 print("The current highlight is from page: \(hlts.getCurrentPageNumber())")
61 hlts.next()
62 }
63
64 //see if there is an AMEX card number
65 pattern = "\\d{4}-\\d{6}-\\d{5}"
66 txt_search.setPattern(pattern)
67
68 step += 1
69 } else if step == 2 {
70 //found an AMEX card number
71 print("There is an AMEX card number: \(result.getMatch()!)")
72
73 //change mode to find the owner of the credit card; supposedly, the owner's
74 //name proceeds the number
75 mode = txt_search.getMode()
76 mode |= e_ptsearch_up.rawValue
77 txt_search.setMode(mode)
78 pattern = "[A-z]++ [A-z]++"
79 txt_search.setPattern(pattern)
80 step += 1
81 } else if step == 3 {
82 //found the owner's name of the AMEX card
83 print("Is the owner's name: \(result.getMatch()!)?")
84
85 //add a link annotation based on the location of the found instance
86 let hlts: PTHighlights = result.getHighlights()
87 hlts.begin(doc)
88 while hlts.hasNext() {
89 let cur_page: PTPage = doc.getPage(UInt32(hlts.getCurrentPageNumber()))
90 let quads: PTVectorQuadPoint = hlts.getCurrentQuads()
91 var i: Int = 0
92
93 while i < quads.size() {
94 //assume each quad is an axis-aligned rectangle
95 let q: PTQuadPoint = quads.get(Int32(i))
96 let x1: Double = min(min(min(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
97 let x2: Double = max(max(max(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
98 let y1: Double = min(min(min(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
99 let y2: Double = max(max(max(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
100 let rect = PTPDFRect(x1: x1, y1: y1, x2: x2, y2: y2)
101 let action = PTAction.createURI(doc.getSDFDoc(), uri: "http://www.pdftron.com")
102 let hyper_link = PTLink.create(withAction: doc.getSDFDoc(), pos: rect, action: action)
103 cur_page.annotPushBack(hyper_link)
104 i += 1
105 }
106 hlts.next()
107 }
108 doc.save(toFile: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("credit card numbers_linked.pdf").path, flags: e_ptlinearized.rawValue)
109
110 break
111 }
112 } else if (result.isPageEnd()) {
113 //you can update your UI here if needed
114 } else {
115 break
116 }
117 }
118 }
119 } catch let e as NSError {
120 print("\(e)")
121 ret = 1
122 }
123 return ret
124 }
125}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales