Handwriting ICR to search PDFs and Extract Text - Java Sample Code

Requirements
View Demo

Sample code shows how to use the Apryse Server OCR module on scanned documents in multiple languages; provided in Python, C++, C# (.Net), Java, Node.js (JavaScript), PHP, Ruby and VB. The OCR module can make searchable PDFs and extract scanned text for further indexing.

Looking for OCR + WebViewer? Check out our OCR - Showcase Sample Code

Learn more about our Server SDK and OCR capabilities.

Implementation steps

To run this sample, you will need:

  1. Get started with Server SDK in your language/framework.
  2. Download ICR Module.
  3. Add the sample code provided below.

To use this feature in production, your license key will need the ICR Package. Trial keys already include this package.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2026 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import java.io.FileWriter;
7import java.io.BufferedWriter;
8import java.io.IOException;
9
10import com.pdftron.common.PDFNetException;
11import com.pdftron.pdf.*;
12import com.pdftron.sdf.SDFDoc;
13
14//---------------------------------------------------------------------------------------
15// The Handwriting ICR Module is an optional PDFNet add-on that can be used to extract
16// handwriting from image-based pages and apply them as hidden text.
17//
18// The Apryse SDK Handwriting ICR Module can be downloaded from https://dev.apryse.com/
19//---------------------------------------------------------------------------------------
20public class HandwritingICRTest {
21
22 static void writeTextToFile(String filename, String text) throws IOException
23 {
24 BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
25 writer.write(text);
26 writer.close();
27 }
28
29 public static void main(String[] args) {
30 try {
31 // The first step in every application using PDFNet is to initialize the
32 // library and set the path to common PDF resources. The library is usually
33 // initialized only once, but calling Initialize() multiple times is also fine.
34 PDFNet.initialize(PDFTronLicense.Key());
35
36 // The location of the Handwriting ICR Module
37 PDFNet.addResourceSearchPath("../../../Lib/");
38
39 // Test if the add-on is installed
40 if (!HandwritingICRModule.isModuleAvailable())
41 {
42 System.out.println("");
43 System.out.println("Unable to run HandwritingICRTest: Apryse SDK Handwriting ICR Module");
44 System.out.println("not available.");
45 System.out.println("---------------------------------------------------------------");
46 System.out.println("The Handwriting ICR Module is an optional add-on, available for download");
47 System.out.println("at https://dev.apryse.com/. If you have already downloaded this");
48 System.out.println("module, ensure that the SDK is able to find the required files");
49 System.out.println("using the PDFNet.addResourceSearchPath() function.");
50 System.out.println("");
51 return;
52 }
53
54 // Relative path to the folder containing test files.
55 String input_path = "../../TestFiles/HandwritingICR/";
56 String output_path = "../../TestFiles/Output/";
57
58 //--------------------------------------------------------------------------------
59 // Example 1) Process a PDF without specifying options
60 System.out.println("Example 1: processing icr.pdf");
61
62 // Open the .pdf document
63 try (PDFDoc doc = new PDFDoc(input_path + "icr.pdf"))
64 {
65 // Run ICR on the .pdf with the default options
66 HandwritingICRModule.processPDF(doc);
67
68 // Save the result with hidden text applied
69 doc.save(output_path + "icr-simple.pdf", SDFDoc.SaveMode.LINEARIZED, null);
70 doc.close();
71 } catch (PDFNetException e) {
72 e.printStackTrace();
73 }
74
75 //--------------------------------------------------------------------------------
76 // Example 2) Process a subset of PDF pages
77 System.out.println("Example 2: processing pages from icr.pdf");
78
79 // Open the .pdf document
80 try (PDFDoc doc = new PDFDoc(input_path + "icr.pdf"))
81 {
82 // Process handwriting with custom options
83 HandwritingICROptions options = new HandwritingICROptions();
84
85 // Optionally, process a subset of pages
86 options.setPages("2-3");
87
88 // Run ICR on the .pdf
89 HandwritingICRModule.processPDF(doc, options);
90
91 // Save the result with hidden text applied
92 doc.save(output_path + "icr-pages.pdf", SDFDoc.SaveMode.LINEARIZED, null);
93 doc.close();
94 } catch (PDFNetException e) {
95 e.printStackTrace();
96 }
97
98 //--------------------------------------------------------------------------------
99 // Example 3) Ignore zones specified for each page
100 System.out.println("Example 3: processing & ignoring zones");
101
102 // Open the .pdf document
103 try (PDFDoc doc = new PDFDoc(input_path + "icr.pdf"))
104 {
105 // Process handwriting with custom options
106 HandwritingICROptions options = new HandwritingICROptions();
107
108 // Process page 2 by ignoring the signature area on the bottom
109 options.setPages("2");
110 RectCollection ignore_zones_page2 = new RectCollection();
111 // These coordinates are in PDF user space, with the origin at the bottom left corner of the page.
112 // Coordinates rotate with the page, if it has rotation applied.
113 ignore_zones_page2.addRect(78, 850.1 - 770, 340, 850.1 - 676);
114 options.addIgnoreZonesForPage(ignore_zones_page2, 2);
115
116 // Run ICR on the .pdf
117 HandwritingICRModule.processPDF(doc, options);
118
119 // Save the result with hidden text applied
120 doc.save(output_path + "icr-ignore.pdf", SDFDoc.SaveMode.LINEARIZED, null);
121 doc.close();
122 } catch (PDFNetException e) {
123 e.printStackTrace();
124 }
125
126 //--------------------------------------------------------------------------------
127 // Example 4) The postprocessing workflow has also an option of extracting ICR results
128 // in JSON format, similar to the one used by the OCR Module
129 System.out.println("Example 4: extract & apply");
130
131 // Open the .pdf document
132 try (PDFDoc doc = new PDFDoc(input_path + "icr.pdf"))
133 {
134 // Extract ICR results in JSON format
135 String json = HandwritingICRModule.getICRJsonFromPDF(doc);
136 writeTextToFile(output_path + "icr-get.json", json);
137
138 // Insert your post-processing step (whatever it might be)
139 // ...
140
141 // Apply potentially modified ICR JSON to the PDF
142 HandwritingICRModule.applyICRJsonToPDF(doc, json);
143
144 // Save the result with hidden text applied
145 doc.save(output_path + "icr-get-apply.pdf", SDFDoc.SaveMode.LINEARIZED, null);
146 doc.close();
147 } catch (PDFNetException e) {
148 e.printStackTrace();
149 }
150 catch (IOException e) {
151 System.out.println(e);
152 }
153 System.out.println("Done.");
154 PDFNet.terminate();
155 } catch (PDFNetException e) {
156 e.printStackTrace();
157 }
158 }
159}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales