Sample Obj-C code for using Apryse SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our iOS SDK and PDF Indexed Search Library.
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9// This sample illustrates the basic text search capabilities of PDFNet.
10
11int main(int argc, char *argv[])
12{
13    @autoreleasepool {
14
15        int ret = 0;
16        [PTPDFNet Initialize: 0];
17        NSString *input_path = @"../../TestFiles/credit card numbers.pdf";
18
19        @try
20        {
21            PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: input_path];
22            [doc InitSecurityHandler];
23
24            PTTextSearch *txt_search = [[PTTextSearch alloc] init];
25            unsigned int mode = e_ptwhole_word | e_ptpage_stop;
26            NSString *pattern = @"joHn sMiTh";
27
28            //call Begin() method to initialize the text search.
29            [txt_search Begin: doc pattern: pattern mode: mode start_page: -1 end_page: -1];
30
31            int step = 0;
32            
33            //call Run() method iteratively to find all matching instances.
34            while ( YES )
35            {
36                PTSearchResult *result = [txt_search Run];
37                
38                if ( result )
39                {
40                    if ( step == 0 )
41                    {   //step 0: found "John Smith"
42                        //note that, here, 'ambient_string' and 'hlts' are not written to, 
43                        //as 'e_ambient_string' and 'e_highlight' are not set.
44
45                        NSLog(@"%@'s credit card number is: ", [result GetMatch]);
46                        //now switch to using regular expressions to find John's credit card number
47                        mode = [txt_search GetMode];
48                        mode |= e_ptreg_expression | e_pthighlight;
49                        [txt_search SetMode: mode];
50                        pattern = @"\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
51                        [txt_search SetPattern: pattern];
52
53                        ++step;
54                    }
55                    else if ( step == 1 )
56                    {
57                        //step 1: found John's credit card number
58                        NSLog(@"  %@", [result GetMatch]);
59
60                        //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
61                        //output the highlight info of the credit card number.
62                        PTHighlights *hlts = [result GetHighlights];
63                        [hlts Begin: doc];
64                        while ( [hlts HasNext] )
65                        {
66                            NSLog(@"The current highlight is from page: %d", [hlts GetCurrentPageNumber]);
67                            [hlts Next];
68                        }
69
70                        //see if there is an AMEX card number
71                        pattern = @"\\d{4}-\\d{6}-\\d{5}";
72                        [txt_search SetPattern: pattern];
73
74                        ++step;
75                    }
76                    else if ( step == 2 )
77                    {
78                        //found an AMEX card number
79                        NSLog(@"\nThere is an AMEX card number:\n  %@", [result GetMatch]);
80
81                        //change mode to find the owner of the credit card; supposedly, the owner's
82                        //name proceeds the number
83                        mode = [txt_search GetMode];
84                        mode |= e_ptsearch_up;
85                        [txt_search SetMode: mode];
86                        pattern = @"[A-z]++ [A-z]++";
87                        [txt_search SetPattern: pattern];
88
89                        ++step;
90                    }
91                    else if ( step == 3 )
92                    {
93                        //found the owner's name of the AMEX card
94                        NSLog(@"Is the owner's name:\n  %@?\n", [result GetMatch]);
95
96                        //add a link annotation based on the location of the found instance
97                        PTHighlights *hlts = [result GetHighlights];
98                        [hlts Begin: doc];
99                        while ( [hlts HasNext] )
100                        {
101                            PTPage *cur_page = [doc GetPage: [hlts GetCurrentPageNumber]];
102                            PTVectorQuadPoint *quads = [hlts GetCurrentQuads];
103                            int i = 0;
104                            for ( ; i < [quads size]; ++i )
105                            {
106                                //assume each quad is an axis-aligned rectangle
107                                PTQuadPoint *q = [quads get: i];
108                                double x1 = MIN(MIN(MIN([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
109                                double x2 = MAX(MAX(MAX([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
110                                double y1 = MIN(MIN(MIN([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
111                                double y2 = MAX(MAX(MAX([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
112                                PTPDFRect * rect = [[PTPDFRect alloc] initWithX1: x1 y1: y1 x2: x2 y2: y2];
113                                PTAction *action = [PTAction CreateURI: [doc GetSDFDoc] uri: @"http://www.pdftron.com"];
114
115                                PTLink *hyper_link = [PTLink CreateWithAction: [doc GetSDFDoc] pos: rect action: action];
116                                [cur_page AnnotPushBack: hyper_link];
117                            }
118                            [hlts Next];
119                        }
120                        [doc SaveToFile: @"../../TestFiles/Output/credit card numbers_linked.pdf" flags: e_ptlinearized];
121
122                        break;
123                    }
124                }
125
126                else if ( [result IsPageEnd] )
127                {
128                    //you can update your UI here, if needed
129                }
130
131                else
132                {
133                    break;
134                }
135            }
136        }
137
138        @catch(NSException *e)
139        {
140            NSLog(@"%@", e.reason);
141            ret = 1;
142        }
143        [PTPDFNet Terminate: 0];
144        return ret;
145    }
146}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import PDFNet
7import Foundation
8
9// This sample illustrates the basic text search capabilities of PDFNet.
10
11func runTextSearchTest() -> Int {
12    return autoreleasepool {
13        var ret: Int = 0
14        
15        let input_path: String? = Bundle.main.path(forResource: "credit card numbers", ofType: "pdf")
16        
17        do {
18            try PTPDFNet.catchException {
19                let doc: PTPDFDoc! = PTPDFDoc(filepath: input_path)
20                doc.initSecurityHandler()
21                
22                let txt_search: PTTextSearch! = PTTextSearch()
23                var mode = e_ptwhole_word.rawValue | e_ptpage_stop.rawValue
24                var pattern = "joHn sMiTh"
25                
26                //call Begin() method to initialize the text search.
27                txt_search.begin(doc, pattern: pattern, mode: mode, start_page: -1, end_page: -1)
28                
29                var step: Int = 0
30                
31                //call run() method iteratively to find all matching instances.
32                while true {
33                    let result: PTSearchResult! = txt_search.run()
34                    
35                    if (result != nil) {
36                        if step == 0 {
37                            //step 0: found "John Smith"
38                            //note that, here, 'ambient_string' and 'hlts' are not written to,
39                            //as 'e_ambient_string' and 'e_highlight' are not set.
40                            
41                            print("\(result.getMatch()!)'s credit card number is: ")
42                            //now switch to using regular expressions to find John's credit card number
43                            mode = txt_search.getMode()
44                            mode |= e_ptreg_expression.rawValue | e_pthighlight.rawValue
45                            txt_search.setMode(mode)
46                            pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"
47                            //or "(\\d{4}-){3}\\d{4}"
48                            txt_search.setPattern(pattern)
49                            
50                            step += 1
51                        } else if step == 1 {
52                            //step 1: found John's credit card number
53                            print("  \(result.getMatch()!)")
54                            
55                            //note that, here, 'hlts' is written to, as 'e_highlight' has been set.
56                            //output the highlight info of the credit card number.
57                            let hlts: PTHighlights = result.getHighlights()
58                            hlts.begin(doc)
59                            while hlts.hasNext() {
60                                print("The current highlight is from page: \(hlts.getCurrentPageNumber())")
61                                hlts.next()
62                            }
63                            
64                            //see if there is an AMEX card number
65                            pattern = "\\d{4}-\\d{6}-\\d{5}"
66                            txt_search.setPattern(pattern)
67                            
68                            step += 1
69                        } else if step == 2 {
70                            //found an AMEX card number
71                            print("There is an AMEX card number: \(result.getMatch()!)")
72                            
73                            //change mode to find the owner of the credit card; supposedly, the owner's
74                            //name proceeds the number
75                            mode = txt_search.getMode()
76                            mode |= e_ptsearch_up.rawValue
77                            txt_search.setMode(mode)
78                            pattern = "[A-z]++ [A-z]++"
79                            txt_search.setPattern(pattern)
80                            step += 1
81                        } else if step == 3 {
82                            //found the owner's name of the AMEX card
83                            print("Is the owner's name: \(result.getMatch()!)?")
84                            
85                            //add a link annotation based on the location of the found instance
86                            let hlts: PTHighlights = result.getHighlights()
87                            hlts.begin(doc)
88                            while hlts.hasNext() {
89                                let cur_page: PTPage = doc.getPage(UInt32(hlts.getCurrentPageNumber()))
90                                let quads: PTVectorQuadPoint = hlts.getCurrentQuads()
91                                var i: Int = 0
92                                
93                                while i < quads.size() {
94                                    //assume each quad is an axis-aligned rectangle
95                                    let q: PTQuadPoint = quads.get(Int32(i))
96                                    let x1: Double = min(min(min(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
97                                    let x2: Double = max(max(max(q.getP1().getX(), q.getP2().getX()), q.getP3().getX()), q.getP4().getX())
98                                    let y1: Double = min(min(min(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
99                                    let y2: Double = max(max(max(q.getP1().getY(), q.getP2().getY()), q.getP3().getY()), q.getP4().getY())
100                                    let rect = PTPDFRect(x1: x1, y1: y1, x2: x2, y2: y2)
101                                    let action = PTAction.createURI(doc.getSDFDoc(), uri: "http://www.pdftron.com")
102                                    let hyper_link = PTLink.create(withAction: doc.getSDFDoc(), pos: rect, action: action)
103                                    cur_page.annotPushBack(hyper_link)
104                                    i += 1
105                                }
106                                hlts.next()
107                            }
108                            doc.save(toFile: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("credit card numbers_linked.pdf").path, flags: e_ptlinearized.rawValue)
109                            
110                            break
111                        }
112                    } else if (result.isPageEnd()) {
113                        //you can update your UI here if needed
114                    } else {
115                        break
116                    }
117                }
118            }
119        } catch let e as NSError {
120            print("\(e)")
121            ret = 1
122        }
123        return ret
124    }
125}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales