Handwriting ICR to search PDFs and Extract Text - Go Sample Code

Requirements
View Demo

Sample code shows how to use the Apryse Server OCR module on scanned documents in multiple languages; provided in Python, C++, C# (.Net), Java, Node.js (JavaScript), PHP, Ruby and VB. The OCR module can make searchable PDFs and extract scanned text for further indexing.

Looking for OCR + WebViewer? Check out our OCR - Showcase Sample Code

Learn more about our Server SDK and OCR capabilities.

Implementation steps

To run this sample, you will need:

  1. Get started with Server SDK in your language/framework.
  2. Download ICR Module.
  3. Add the sample code provided below.

To use this feature in production, your license key will need the ICR Package. Trial keys already include this package.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2026 by Apryse Software Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 "testing"
10 "os"
11 "flag"
12 . "github.com/pdftron/pdftron-go/v2"
13)
14
15var licenseKey string
16var modulePath string
17
18func init() {
19 flag.StringVar(&licenseKey, "license", "", "License key for Apryse SDK")
20 flag.StringVar(&modulePath, "modulePath", "", "Path for downloaded modules")
21}
22
23// Relative path to the folder containing test files.
24var inputPath = "../TestFiles/HandwritingICR/"
25var outputPath = "../TestFiles/Output/"
26
27func WriteTextToFile(outputFile string, text string) {
28 f, err := os.Create(outputFile)
29 if err != nil {
30 fmt.Println(err)
31 }
32
33 defer f.Close()
34
35 _, err2 := f.WriteString(text)
36 if err2 != nil {
37 fmt.Println(err2)
38 }
39}
40
41// ---------------------------------------------------------------------------------------
42// The Handwriting ICR Module is an optional PDFNet add-on that can be used to extract
43// handwriting from image-based pages and apply them as hidden text.
44//
45// The Apryse SDK Handwriting ICR Module can be downloaded from https://dev.apryse.com/
46// --------------------------------------------------------------------------------------
47
48func TestHandwritingICR(t *testing.T) {
49
50 // The first step in every application using PDFNet is to initialize the
51 // library and set the path to common PDF resources. The library is usually
52 // initialized only once, but calling Initialize() multiple times is also fine.
53 PDFNetInitialize(licenseKey)
54
55 // The location of the Handwriting ICR Module
56 PDFNetAddResourceSearchPath(modulePath)
57
58 // Test if the add-on is installed
59 if !HandwritingICRModuleIsModuleAvailable() {
60
61 fmt.Println("Unable to run HandwritingICRTest: Apryse SDK Handwriting ICR Module\n" +
62 "not available.\n" +
63 "---------------------------------------------------------------\n" +
64 "The Handwriting ICR Module is an optional add-on, available for download\n" +
65 "at https://dev.apryse.com/. If you have already downloaded this\n" +
66 "module, ensure that the SDK is able to find the required files\n" +
67 "using the PDFNetAddResourceSearchPath() function.")
68
69 } else {
70
71 // --------------------------------------------------------------------------------
72 // Example 1) Process a PDF without specifying options
73 fmt.Println("Example 1: processing icr.pdf")
74
75 // Open the .pdf document
76 doc := NewPDFDoc(inputPath + "icr.pdf")
77
78 // Run ICR on the .pdf with the default options
79 HandwritingICRModuleProcessPDF(doc)
80
81 // Save the result with hidden text applied
82 doc.Save(outputPath + "icr-simple.pdf", uint(SDFDocE_linearized))
83 doc.Close()
84
85 // --------------------------------------------------------------------------------
86 // Example 2) Process a subset of PDF pages
87 fmt.Println("Example 2: processing pages from icr.pdf")
88
89 // Open the .pdf document
90 doc = NewPDFDoc(inputPath + "icr.pdf")
91
92 // Process handwriting with custom options
93 options := NewHandwritingICROptions()
94
95 // Optionally, process a subset of pages
96 options.SetPages("2-3")
97
98 // Run ICR on the .pdf
99 HandwritingICRModuleProcessPDF(doc, options)
100
101 // Save the result with hidden text applied
102 doc.Save(outputPath + "icr-pages.pdf", uint(SDFDocE_linearized))
103 doc.Close()
104
105 // --------------------------------------------------------------------------------
106 // Example 3) Ignore zones specified for each page
107 fmt.Println("Example 3: processing & ignoring zones")
108
109 // Open the .pdf document
110 doc = NewPDFDoc(inputPath + "icr.pdf")
111
112 // Process handwriting with custom options
113 options = NewHandwritingICROptions()
114
115 // Process page 2 by ignoring the signature area on the bottom
116 options.SetPages("2")
117 ignoreZonesPage2 := NewRectCollection()
118 // These coordinates are in PDF user space, with the origin at the bottom left corner of the page.
119 // Coordinates rotate with the page, if it has rotation applied.
120 ignoreZonesPage2.AddRect(NewRect(78.0, 850.1 - 770.0, 340.0, 850.1 - 676.0))
121 options.AddIgnoreZonesForPage(ignoreZonesPage2, 2)
122
123 // Run ICR on the .pdf
124 HandwritingICRModuleProcessPDF(doc, options)
125
126 // Save the result with hidden text applied
127 doc.Save(outputPath + "icr-ignore.pdf", uint(SDFDocE_linearized))
128 doc.Close()
129
130 // --------------------------------------------------------------------------------
131 // Example 4) The postprocessing workflow has also an option of extracting ICR results
132 // in JSON format, similar to the one used by the OCR Module
133 fmt.Println("Example 4: extract & apply")
134
135 // Open the .pdf document
136 doc = NewPDFDoc(inputPath + "icr.pdf")
137
138 // Extract ICR results in JSON format
139 json := HandwritingICRModuleGetICRJsonFromPDF(doc)
140 WriteTextToFile(outputPath + "icr-get.json", json)
141
142 // Insert your post-processing step (whatever it might be)
143 // ...
144
145 // Apply potentially modified ICR JSON to the PDF
146 HandwritingICRModuleApplyICRJsonToPDF(doc, json)
147
148 // Save the result with hidden text applied
149 doc.Save(outputPath + "icr-get-apply.pdf", uint(SDFDocE_linearized))
150 doc.Close()
151
152 fmt.Println("Done.")
153
154 PDFNetTerminate()
155 }
156}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales