Some test text!

Search
Hamburger Icon

Go / Guides / Text search

Search for text in a PDF in Go

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
doc := NewPDFDoc(filename)
txtSearch := NewTextSearch()
mode := TextSearchE_whole_word | TextSearchE_page_stop
pattern := ""

mode = mode | TextSearchE_reg_expression | TextSearchE_highlight
txtSearch.SetMode(uint(mode))
pattern := "\\d{4}-\\d{4}-\\d{4}-\\d{4}"     //or "(\\d{4}-){3}\\d{4}"
txtSearch.SetPattern(pattern)

// call Begin() method to initialize the text search.
txtSearch.Begin(doc, pattern, uint(mode))
searchResult := txtSearch.Run()

if searchResult.IsFound(){
  // add a link annotation based on the location of the found instance
  hlts := searchResult.GetHighlights()
  hlts.Begin(doc)
  
  for hlts.HasNext(){
    curPage := doc.GetPage(uint(hlts.GetCurrentPageNumber()))
    quadsInfo := hlts.GetCurrentQuads()
    
    i := 0
    for i < int(quadsInfo.Size()){
      q := quadsInfo.Get(i)
      // assume each quad is an axis-aligned rectangle 
      x1 := Min(Min(Min(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
      x2 := Max(Max(Max(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
      y1 := Min(Min(Min(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
      y2 := Max(Max(Max(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
      hyperLink := LinkCreate(doc.GetSDFDoc(), NewRect(x1, y1, x2, y2), ActionCreateURI(doc.GetSDFDoc(), "http://www.pdftron.com"))
      curPage.AnnotPushBack(hyperLink)
      i = i + 1
    }
    hlts.Next()
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Chat with us