Search PDF for Text / String - TextSearch - Go Sample Code

Sample code for using Apryse SDK to search text on PDF pages using regular expressions; provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. The TextSearch utility class builds on functionality available in TextExtractor Sample to simplify most common search operations. Learn more about our Server SDK and PDF Indexed Search Library.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 "strconv"
10 . "math"
11 . "pdftron"
12)
13
14import "pdftron/Samples/LicenseKey/GO"
15
16// This sample illustrates the basic text search capabilities of PDFNet.
17
18// Relative path to the folder containing the test files.
19var inputPath = "../../TestFiles/"
20var outputPath = "../../TestFiles/Output/"
21
22func main(){
23 // Initialize PDFNet
24 PDFNetInitialize(PDFTronLicense.Key)
25 doc := NewPDFDoc(inputPath + "credit card numbers.pdf")
26 doc.InitSecurityHandler()
27
28 txtSearch := NewTextSearch()
29 mode := TextSearchE_whole_word | TextSearchE_page_stop
30
31 pattern := "joHn sMiTh"
32
33 // call Begin() method to initialize the text search.
34 txtSearch.Begin(doc, pattern, uint(mode))
35
36 step := 0
37
38 // call Run() method iteratively to find all matching instances.
39 for true{
40 searchResult := txtSearch.Run()
41 if searchResult.IsFound(){
42 if step == 0{
43 // step 0: found "John Smith"
44 // note that, here, 'ambient_string' and 'hlts' are not written to,
45 // as 'e_ambient_string' and 'e_highlight' are not set.
46
47 fmt.Println(searchResult.GetMatch() + "'s credit card number is: ")
48 // now switch to using regular expressions to find John's credit card number
49 mode := PdftronPDFTextSearchTextSearchModes(txtSearch.GetMode())
50 mode = mode | TextSearchE_reg_expression | TextSearchE_highlight
51 txtSearch.SetMode(uint(mode))
52 pattern := "\\d{4}-\\d{4}-\\d{4}-\\d{4}" //or "(\\d{4}-){3}\\d{4}"
53 txtSearch.SetPattern(pattern)
54 step = step + 1
55 }else if step == 1{
56 // step 1: found John's credit card number
57 fmt.Println(" " + searchResult.GetMatch())
58
59 // note that, here, 'hlts' is written to, as 'e_highligh' has been set.
60 // output the highlight info of the credit card number
61 hlts := searchResult.GetHighlights()
62 hlts.Begin(doc)
63 for hlts.HasNext(){
64 fmt.Println("The current highlight is from page: " + strconv.Itoa(hlts.GetCurrentPageNumber()))
65 hlts.Next()
66 }
67 // see if there is an AMEX card number
68 pattern := "\\d{4}-\\d{6}-\\d{5}"
69 txtSearch.SetPattern(pattern)
70
71 step = step + 1
72 }else if step == 2{
73 // found an AMEX card number
74 fmt.Println("\nThere is an AMEX card number:\n " + searchResult.GetMatch())
75
76 // change mode to find the owner of the credit card; supposedly, the owner's
77 // name proceeds the number
78 mode := PdftronPDFTextSearchTextSearchModes(txtSearch.GetMode())
79 mode = mode | TextSearchE_search_up
80 txtSearch.SetMode(uint(mode))
81 pattern := "[A-z]++ [A-z]++"
82 txtSearch.SetPattern(pattern)
83 step = step + 1
84 }else if step == 3{
85 // found the owner's name of the AMEX card
86 fmt.Println("Is the owner's name:\n " + searchResult.GetMatch() + "?")
87
88 // add a link annotation based on the location of the found instance
89 hlts := searchResult.GetHighlights()
90 hlts.Begin(doc)
91
92 for hlts.HasNext(){
93 curPage := doc.GetPage(uint(hlts.GetCurrentPageNumber()))
94 quadsInfo := hlts.GetCurrentQuads()
95
96 i := 0
97 for i < int(quadsInfo.Size()){
98 q := quadsInfo.Get(i)
99 // assume each quad is an axis-aligned rectangle
100 x1 := Min(Min(Min(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
101 x2 := Max(Max(Max(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
102 y1 := Min(Min(Min(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
103 y2 := Max(Max(Max(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
104 hyperLink := LinkCreate(doc.GetSDFDoc(), NewRect(x1, y1, x2, y2), ActionCreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"))
105 curPage.AnnotPushBack(hyperLink)
106 i = i + 1
107 }
108 hlts.Next()
109 }
110 doc.Save(outputPath + "credit card numbers_linked.pdf", uint(SDFDocE_linearized))
111 break
112 }
113 }else if searchResult.IsPageEnd(){
114 //you can update your UI here, if needed
115 }else{
116 break
117 }
118 }
119 doc.Close()
120 PDFNetTerminate()
121}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales