More languages
Some test text!
More languages
Sample VB code for using PDFTron SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our VB PDF Library and PDF Indexed Search Library.
Get Started Samples DownloadTo run this sample, get started with a free trial of Apryse SDK.
'
' Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
'
Imports System
Imports pdftron
Imports pdftron.Common
Imports pdftron.Filters
Imports pdftron.SDF
Imports pdftron.PDF
Module TextSearchTestVB
Dim pdfNetLoader As PDFNetLoader
Sub New()
pdfNetLoader = pdftron.PDFNetLoader.Instance()
End Sub
Sub Main()
PDFNet.Initialize(PDFTronLicense.Key)
Dim input_path As String = "../../../../TestFiles/"
Try
Using doc As PDFDoc = New PDFDoc(input_path & "credit card numbers.pdf")
doc.InitSecurityHandler()
Dim page_num As Int32 = 0
Dim result_str As String = "", ambient_string As String = ""
Dim hlts As Highlights = New Highlights()
Dim txt_search As TextSearch = New TextSearch()
Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
Dim pattern As String = "joHn sMiTh"
txt_search.Begin(doc, pattern, mode, -1, -1)
Dim step_ As Integer = 0
While True
Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)
If code = TextSearch.ResultCode.e_found Then
If step_ = 0 Then
Console.WriteLine(result_str & "'s credit card number is: ")
mode = txt_search.GetMode()
mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
txt_search.SetMode(mode)
pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
txt_search.SetPattern(pattern)
step_ += 1
ElseIf step_ = 1 Then
Console.WriteLine(" " & result_str)
hlts.Begin(doc)
While hlts.HasNext()
Console.WriteLine("The current highlight is from page: " & hlts.GetCurrentPageNumber())
hlts.Next()
End While
pattern = "\d{4}-\d{6}-\d{5}"
txt_search.SetPattern(pattern)
step_ += 1
ElseIf step_ = 2 Then
Console.WriteLine(vbLf & "There is an AMEX card number:" & vbLf & " " & result_str)
mode = txt_search.GetMode()
mode = mode Or CInt((TextSearch.SearchMode.e_search_up))
txt_search.SetMode(mode)
pattern = "[A-z]++ [A-z]++"
txt_search.SetPattern(pattern)
step_ += 1
ElseIf step_ = 3 Then
Console.WriteLine("Is the owner's name:" & vbLf & " " & result_str & "?")
hlts.Begin(doc)
While hlts.HasNext()
Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
Dim quads As Double() = hlts.GetCurrentQuads()
Dim quad_count As Integer = quads.Length / 8
For i As Integer = 0 To quad_count - 1
Dim offset As Integer = 8 * i
Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"))
hyper_link.RefreshAppearance()
cur_page.AnnotPushBack(hyper_link)
Next
hlts.Next()
End While
Dim output_path As String = "../../../../TestFiles/Output/"
doc.Save(output_path & "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized)
Exit While
End If
ElseIf code = TextSearch.ResultCode.e_page Then
Else
Exit While
End If
End While
End Using
Catch e As PDFNetException
Console.WriteLine(e.Message)
End Try
PDFNet.Terminate()
End Sub
End Module