Did you find this guide helpful?
Some test text!
Python / Guides / Text search
Platform
Documentation
To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.
doc = PDFDoc(filename)
txt_search = TextSearch()
mode = TextSearch.e_whole_word | TextSearch.e_page_stop
pattern = ""
# use regular expression to find credit card number
mode |= TextSearch.e_reg_expression | TextSearch.e_highlight
txt_search.SetMode(mode)
pattern = "\\d{4}-\\d{4}-\\d{4}-\\d{4}" #or "(\\d{4}-){3}\\d{4}"
txt_search.SetPattern(pattern)
# call Begin() method to initialize the text search.
txt_search.Begin(doc, pattern, mode)
searchResult = txt_search.Run()
if searchResult.IsFound():
# add a link annotation based on the location of the found instance
hlts = searchResult.GetHighlights()
hlts.Begin(doc)
while (hlts.HasNext()):
cur_page = doc.GetPage(hlts.GetCurrentPageNumber())
quadsInfo = hlts.GetCurrentQuads()
i = 0
while i < len(quadsInfo):
q = quadsInfo[i]
# assume each quad is an axis-aligned rectangle
x1 = min(min(min(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
x2 = max(max(max(q.p1.x, q.p2.x), q.p3.x), q.p4.x)
y1 = min(min(min(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
y2 = max(max(max(q.p1.y, q.p2.y), q.p3.y), q.p4.y)
hyper_link = Link.Create(doc.GetSDFDoc(), Rect(x1, y1, x2, y2), Action.CreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"))
cur_page.AnnotPushBack(hyper_link)
i = i + 1
hlts.Next()
Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.
Get the answers you need: Support