To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.
In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
1PDFDoc doc = new PDFDoc(filename);
2Int32 page_num = 0;
3String result_str = "", ambient_string = "";
4Highlights hlts = new Highlights();
5TextSearch txt_search = new TextSearch();
6Int32 mode = (Int32)(TextSearch.SearchMode.e_whole_word | TextSearch.SearchMode.e_page_stop | TextSearch.SearchMode.e_highlight);
7String pattern = "";
8
9//use regular expression to find credit card number
10mode |= (Int32)(TextSearch.SearchMode.e_reg_expression | TextSearch.SearchMode.e_highlight);
11txt_search.SetMode(mode);
12String pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
13txt_search.SetPattern(pattern);
14
15//call Begin() method to initialize the text search.
16txt_search.Begin( doc, pattern, mode, -1, -1 );
17TextSearch.ResultCode code = txt_search.Run(ref page_num, ref result_str, ref ambient_string, hlts );
18
19if ( code == TextSearch.ResultCode.e_found )
20{
21 //add a link annotation based on the location of the found instance
22 hlts.Begin(doc);
23 while (hlts.HasNext())
24 {
25 Page cur_page = doc.GetPage(hlts.GetCurrentPageNumber());
26 double[] quads = hlts.GetCurrentQuads();
27 int quad_count = quads.Length / 8;
28 for (int i = 0; i < quad_count; ++i)
29 {
30 //assume each quad is an axis-aligned rectangle
31 int offset = 8 * i;
32 double x1 = Math.Min(Math.Min(Math.Min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
33 double x2 = Math.Max(Math.Max(Math.Max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
34 double y1 = Math.Min(Math.Min(Math.Min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
35 double y2 = Math.Max(Math.Max(Math.Max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
36
37 Annots.Link hyper_link = Annots.Link.Create(doc, new Rect(x1, y1, x2, y2), Action.CreateURI(doc, "http://www.apryse.com"));
38 hyper_link.RefreshAppearance();
39 cur_page.AnnotPushBack(hyper_link);
40 }
41 hlts.Next();
42 }
43}
1PDFDoc doc(filename);
2TextSearch txt_search;
3TextSearch::Mode mode = TextSearch::e_whole_word | TextSearch::e_page_stop;
4UString pattern("");
5
6//use regular expression to find credit card number
7mode |= TextSearch::e_reg_expression | TextSearch::e_highlight;
8txt_search.SetMode(mode);
9pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
10txt_search.SetPattern(pattern);
11
12//call Begin() method to initialize the text search.
13txt_search.Begin( doc, pattern, mode );
14SearchResult result = txt_search.Run();
15
16if ( result )
17{
18 //add a link annotation based on the location of the found instance
19 Highlights hlts = result.GetHighlights();
20 hlts.Begin(doc);
21 while ( hlts.HasNext() )
22 {
23 Page cur_page= doc.GetPage(hlts.GetCurrentPageNumber());
24 const double *quads;
25 int quad_count = hlts.GetCurrentQuads(quads);
26 for ( int i = 0; i < quad_count; ++i )
27 {
28 //assume each quad is an axis-aligned rectangle
29 const double *q = &quads[8*i];
30 double x1 = min(min(min(q[0], q[2]), q[4]), q[6]);
31 double x2 = max(max(max(q[0], q[2]), q[4]), q[6]);
32 double y1 = min(min(min(q[1], q[3]), q[5]), q[7]);
33 double y2 = max(max(max(q[1], q[3]), q[5]), q[7]);
34 Annots::Link hyper_link = Annots::Link::Create(doc, Rect(x1, y1, x2, y2), Action::CreateURI(doc, "http://www.apryse.com"));
35 cur_page.AnnotPushBack(hyper_link);
36 }
37 hlts.Next();
38 }
39}
1doc := NewPDFDoc(filename)
2txtSearch := NewTextSearch()
3mode := TextSearchE_whole_word | TextSearchE_page_stop
4pattern := ""
5
6mode = mode | TextSearchE_reg_expression | TextSearchE_highlight
7txtSearch.SetMode(uint(mode))
8pattern := "\\d{4}-\\d{4}-\\d{4}-\\d{4}" //or "(\\d{4}-){3}\\d{4}"
9txtSearch.SetPattern(pattern)
10
11// call Begin() method to initialize the text search.
12txtSearch.Begin(doc, pattern, uint(mode))
13searchResult := txtSearch.Run()
14
15if searchResult.IsFound(){
16 // add a link annotation based on the location of the found instance
17 hlts := searchResult.GetHighlights()
18 hlts.Begin(doc)
19
20 for hlts.HasNext(){
21 curPage := doc.GetPage(uint(hlts.GetCurrentPageNumber()))
22 quadsInfo := hlts.GetCurrentQuads()
23
24 i := 0
25 for i < int(quadsInfo.Size()){
26 q := quadsInfo.Get(i)
27 // assume each quad is an axis-aligned rectangle
28 x1 := Min(Min(Min(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
29 x2 := Max(Max(Max(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
30 y1 := Min(Min(Min(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
31 y2 := Max(Max(Max(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
32 hyperLink := LinkCreate(doc.GetSDFDoc(), NewRect(x1, y1, x2, y2), ActionCreateURI(doc.GetSDFDoc(), "http://www.apryse.com"))
33 curPage.AnnotPushBack(hyperLink)
34 i = i + 1
35 }
36 hlts.Next()
37 }
38}
1PDFDoc doc = new PDFDoc(filename);
2TextSearch txt_search = new TextSearch();
3int mode = TextSearch.e_whole_word | TextSearch.e_page_stop;
4String pattern = "";
5
6//use regular expression to find credit card number
7mode |= TextSearch.e_reg_expression | TextSearch.e_highlight;
8txt_search.setMode(mode);
9String new_pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
10txt_search.setPattern(new_pattern);
11
12//call Begin() method to initialize the text search.
13txt_search.begin(doc, pattern, mode, -1, -1);
14TextSearchResult result = txt_search.run();
15
16if (result.getCode() == TextSearchResult.e_found) {
17 //add a link annotation based on the location of the found instance
18 Highlights hlts = result.getHighlights();
19 hlts.begin(doc);
20 while (hlts.hasNext()) {
21 Page cur_page = doc.getPage(hlts.getCurrentPageNumber());
22 double[] q = hlts.getCurrentQuads();
23 int quad_count = q.length / 8;
24 for (int i = 0; i < quad_count; ++i) {
25 //assume each quad is an axis-aligned rectangle
26 int offset = 8 * i;
27 double x1 = Math.min(Math.min(Math.min(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
28 double x2 = Math.max(Math.max(Math.max(q[offset + 0], q[offset + 2]), q[offset + 4]), q[offset + 6]);
29 double y1 = Math.min(Math.min(Math.min(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
30 double y2 = Math.max(Math.max(Math.max(q[offset + 1], q[offset + 3]), q[offset + 5]), q[offset + 7]);
31 annots.Link hyper_link = annots.Link.create(doc, new Rect(x1, y1, x2, y2), Action.createURI(doc, "http://www.apryse.com"));
32 cur_page.annotPushBack(hyper_link);
33 }
34 hlts.next();
35 }
36}
1async function main() {
2 const doc = await PDFNet.PDFDoc.createFromURL(filename);
3 const txtSearch = await PDFNet.TextSearch.create();
4 let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
5 let pattern = '';
6
7 //use regular expression to find credit card number
8 mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
9 txtSearch.setMode(mode);
10 pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
11 txtSearch.setPattern(pattern);
12
13 //call Begin() method to initialize the text search.
14 txtSearch.begin(doc, pattern, mode);
15 const result = await txtSearch.run();
16
17 if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
18 // add a link annotation based on the location of the found instance
19 hlts = result.highlights;
20 await hlts.begin(doc); // is await needed?
21 while (await hlts.hasNext()) {
22 const curPage = await doc.getPage(await hlts.getCurrentPageNumber());
23 const quadArr = await hlts.getCurrentQuads();
24 for (let i = 0; i < quadArr.length; ++i) {
25 const currQuad = quadArr[i];
26 const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
27 const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
28 const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
29 const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
30
31 const hyperLink = await PDFNet.LinkAnnot.create(doc, await PDFNet.Rect.init(x1, y1, x2, y2));
32 await hyperLink.setAction(await PDFNet.Action.createURI(doc, 'http://www.apryse.com'));
33 await curPage.annotPushBack(hyperLink);
34 }
35 hlts.next();
36 }
37 }
38}
39PDFNet.runWithCleanup(main);
1PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: filename];
2PTTextSearch *txt_search = [[PTTextSearch alloc] init];
3unsigned int mode = e_ptwhole_word | e_ptpage_stop;
4NSString *pattern = @"";
5
6//use regular expression to find credit card number
7mode |= e_ptreg_expression | e_pthighlight;
8[txt_search SetMode: mode];
9pattern = @"\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
10[txt_search SetPattern: pattern];
11
12//call Begin() method to initialize the text search.
13[txt_search Begin: doc pattern: pattern mode: mode start_page: -1 end_page: -1];
14PTSearchResult *result = [txt_search Run];
15
16if ( result )
17{
18 //add a link annotation based on the location of the found instance
19 PTHighlights *hlts = [result GetHighlights];
20 [hlts Begin: doc];
21 while ( [hlts HasNext] )
22 {
23 PTPage *cur_page = [doc GetPage: [hlts GetCurrentPageNumber]];
24 PTVectorQuadPoint *quads = [hlts GetCurrentQuads];
25 int i = 0;
26 for ( ; i < [quads size]; ++i )
27 {
28 //assume each quad is an axis-aligned rectangle
29 PTQuadPoint *q = [quads get: i];
30 double x1 = MIN(MIN(MIN([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
31 double x2 = MAX(MAX(MAX([[q getP1] getX], [[q getP2] getX]), [[q getP3] getX]), [[q getP4] getX]);
32 double y1 = MIN(MIN(MIN([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
33 double y2 = MAX(MAX(MAX([[q getP1] getY], [[q getP2] getY]), [[q getP3] getY]), [[q getP4] getY]);
34 PTPDFRect * rect = [[PTPDFRect alloc] initWithX1: x1 y1: y1 x2: x2 y2: y2];
35 PTAction *action = [PTAction CreateURI: [doc GetSDFDoc] uri: @"http://www.apryse.com"];
36
37 PTLink *hyper_link = [PTLink CreateWithAction: [doc GetSDFDoc] pos: rect action: action];
38 [cur_page AnnotPushBack: hyper_link];
39 }
40 [hlts Next];
41 }
42}
1$doc = new PDFDoc($filename);
2$txt_search = new TextSearch();
3$mode = TextSearch::e_whole_word | TextSearch::e_page_stop;
4$pattern = "";
5
6//use regular expression to find credit card number
7$mode |= TextSearch::e_reg_expression | TextSearch::e_highlight;
8$txt_search->SetMode($mode);
9$pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}"; //or "(\\d{4}-){3}\\d{4}"
10$txt_search->SetPattern($pattern);
11
12//call Begin() method to initialize the text search.
13$txt_search->Begin( $doc, $pattern, $mode );
14$searchResult = $txt_search->Run();
15
16if ( $searchResult->IsFound() )
17{
18 //add a link annotation based on the location of the found instance
19 $hlts = $searchResult->GetHighlights();
20 $hlts->Begin($doc);
21 while ( $hlts->HasNext() )
22 {
23 $cur_page= $doc->GetPage($hlts->GetCurrentPageNumber());
24 $quadsInfo = $hlts->GetCurrentQuads();
25
26 for ( $i = 0; $i < $quadsInfo->size(); ++$i )
27 {
28 //assume each quad is an axis-aligned rectangle
29 $q = $quadsInfo->get($i);
30 $x1 = min(min(min($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
31 $x2 = max(max(max($q->p1->x, $q->p2->x), $q->p3->x), $q->p4->x);
32 $y1 = min(min(min($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
33 $y2 = max(max(max($q->p1->y, $q->p2->y), $q->p3->y), $q->p4->y);
34 $hyper_link = Link::Create($doc->GetSDFDoc(), new Rect($x1, $y1, $x2, $y2), Action::CreateURI($doc->GetSDFDoc(), "http://www.apryse.com"));
35 $cur_page->AnnotPushBack($hyper_link);
36 }
37 $hlts->Next();
38 }
39}
1doc = PDFDoc(filename)
2txt_search = TextSearch()
3mode = TextSearch.e_whole_word | TextSearch.e_page_stop
4pattern = ""
5
6# use regular expression to find credit card number
7mode |= TextSearch.e_reg_expression | TextSearch.e_highlight
8txt_search.SetMode(mode)
9pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}" #or "(\\d{4}-){3}\\d{4}"
10txt_search.SetPattern(pattern)
11
12# call Begin() method to initialize the text search.
13txt_search.Begin(doc, pattern, mode)
14searchResult = txt_search.Run()
15
16if searchResult.IsFound():
17 # add a link annotation based on the location of the found instance
18 hlts = searchResult.GetHighlights()
19 hlts.Begin(doc)
20
21 while (hlts.HasNext()):
22 cur_page = doc.GetPage(hlts.GetCurrentPageNumber())
23 quadsInfo = hlts.GetCurrentQuads()
24
25 i = 0
26 while i < len(quadsInfo):
27 q = quadsInfo[i]
28 # assume each quad is an axis-aligned rectangle
29 x1 = min(min(min(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
30 x2 = max(max(max(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
31 y1 = min(min(min(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
32 y2 = max(max(max(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
33 hyper_link = Link.Create(doc.GetSDFDoc(), Rect(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc(), "http://www.apryse.com"))
34 cur_page.AnnotPushBack(hyper_link)
35 i = i + 1
36 hlts.Next()
1doc = PDFDoc.new(filename)
2txt_search = TextSearch.new
3mode = TextSearch::E_whole_word | TextSearch::E_page_stop
4pattern = ""
5
6# use regular expression to find credit card number
7mode |= TextSearch::E_reg_expression | TextSearch::E_highlight
8txt_search.SetMode(mode)
9pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}" #or "(\\d{4}-){3}\\d{4}"
10txt_search.SetPattern(pattern)
11
12# call Begin method to initialize the text search.
13txt_search.Begin(doc, pattern, mode)
14searchResult = txt_search.Run
15
16if searchResult.IsFound
17 # add a link annotation based on the location of the found instance
18 hlts = searchResult.GetHighlights
19 hlts.Begin(doc)
20
21 while hlts.HasNext do
22 cur_page = doc.GetPage(hlts.GetCurrentPageNumber)
23 quadsInfo = hlts.GetCurrentQuads
24
25 i = 0
26 while i < quadsInfo.size do
27 q = quadsInfo[i]
28 # assume each quad is an axis-aligned rectangle
29 x1 = [q.p1.x, q.p2.x, q.p3.x, q.p4.x].min
30 x2 = [q.p1.x, q.p2.x, q.p3.x, q.p4.x].max
31 y1 = [q.p1.y, q.p2.y, q.p3.y, q.p4.y].min
32 y2 = [q.p1.y, q.p2.y, q.p3.y, q.p4.y].max
33 hyper_link = Link.Create(doc.GetSDFDoc, Rect.new(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc, "http://www.apryse.com"))
34 cur_page.AnnotPushBack(hyper_link)
35 i = i + 1
36 end
37 hlts.Next
38 end
39end
1Dim doc As PDFDoc = New PDFDoc(filename)
2Dim page_num As Int32 = 0
3Dim result_str As String = "", ambient_string As String = ""
4Dim hlts As Highlights = New Highlights()
5Dim txt_search As TextSearch = New TextSearch()
6Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
7Dim pattern As String = ""
8
9' use regular expression to find credit card number
10mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
11txt_search.SetMode(mode)
12pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
13txt_search.SetPattern(pattern)
14
15' call Begin method to initialize the text search.
16txt_search.Begin(doc, pattern, mode, -1, -1)
17Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)
18
19If code = TextSearch.ResultCode.e_found Then
20 ' add a link annotation based on the location of the found instance
21 hlts.Begin(doc)
22 While hlts.HasNext()
23 Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
24 Dim quads As Double() = hlts.GetCurrentQuads()
25 Dim quad_count As Integer = quads.Length / 8
26
27 For i As Integer = 0 To quad_count - 1
28 Dim offset As Integer = 8 * i
29 Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
30 Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
31 Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
32 Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
33 Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.apryse.com"))
34 hyper_link.RefreshAppearance()
35 cur_page.AnnotPushBack(hyper_link)
36 Next
37
38 hlts.Next()
39 End While
40End If
Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales