PDF2Html - Convert PDF to HTML - Java Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8
9//---------------------------------------------------------------------------------------
10// The following sample illustrates how to use the PDF::Convert utility class to convert
11// documents and files to HTML.
12//
13// There are two HTML modules and one of them is an optional PDFNet Add-on.
14// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
15// documents.
16// 2. The optional add-on module is used to convert PDF documents to HTML documents with
17// text flowing across the browser window.
18//
19// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
20//
21// Please contact us if you have any questions.
22//---------------------------------------------------------------------------------------
23
24public class PDF2HtmlTest
25{
26 // Relative path to the folder containing test files.
27 static String inputPath = "../../TestFiles/";
28 static String outputPath = "../../TestFiles/Output/";
29
30 /// <summary>
31 /// The main entry point for the application.
32 /// </summary>
33 public static void main(String[] args)
34 {
35 // The first step in every application using PDFNet is to initialize the
36 // library. The library is usually initialized only once, but calling
37 // Initialize() multiple times is also fine.
38 PDFNet.initialize(PDFTronLicense.Key());
39
40 boolean err = false;
41
42 //////////////////////////////////////////////////////////////////////////
43
44 try {
45 // Convert PDF document to HTML with fixed positioning option turned on (default)
46 System.out.println("Converting PDF to HTML with fixed positioning option turned on (default)");
47
48 String outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
49
50 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
51
52 System.out.println("Result saved in " + outputFile);
53 } catch (PDFNetException e) {
54 System.out.println("Unable to convert PDF document to HTML, error: ");
55 System.out.println(e);
56 err = true;
57 } catch (Exception e) {
58 System.out.println("Unknown Exception, error: ");
59 System.out.println(e);
60 err = true;
61 }
62
63 //////////////////////////////////////////////////////////////////////////
64
65 PDFNet.addResourceSearchPath("../../../Lib/");
66
67 try {
68 if (!StructuredOutputModule.isModuleAvailable()) {
69 System.out.println();
70 System.out.println("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
71 System.out.println("-------------------------------------------------------------------------------------");
72 System.out.println("The Structured Output module is an optional add-on, available for download");
73 System.out.println("at https://docs.apryse.com/core/info/modules/. If you have already");
74 System.out.println("downloaded this module, ensure that the SDK is able to find the required files");
75 System.out.println("using the PDFNet::AddResourceSearchPath() function.");
76 System.out.println();
77 return;
78 }
79 } catch (PDFNetException e) {
80 System.out.println(e);
81 return;
82 } catch (Exception e) {
83 System.out.println(e);
84 return;
85 }
86
87 //////////////////////////////////////////////////////////////////////////
88
89 try {
90 // Convert PDF document to HTML with reflow full option turned on (1)
91 System.out.println("Converting PDF to HTML with reflow full option turned on (1)");
92
93 String outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95 Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
99
100 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
101
102 System.out.println("Result saved in " + outputFile);
103 } catch (PDFNetException e) {
104 System.out.println("Unable to convert PDF document to HTML, error: ");
105 System.out.println(e);
106 err = true;
107 } catch (Exception e) {
108 System.out.println("Unknown Exception, error: ");
109 System.out.println(e);
110 err = true;
111 }
112
113 //////////////////////////////////////////////////////////////////////////
114
115 try {
116 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
117 System.out.println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
118
119 String outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
120
121 Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
122
123 // Set e_reflow_full content reflow setting
124 htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
125
126 // Convert only the first page
127 htmlOutputOptions.setPages(1, 1);
128
129 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
130
131 System.out.println("Result saved in " + outputFile);
132 } catch (PDFNetException e) {
133 System.out.println("Unable to convert PDF document to HTML, error: ");
134 System.out.println(e);
135 err = true;
136 } catch (Exception e) {
137 System.out.println("Unknown Exception, error: ");
138 System.out.println(e);
139 err = true;
140 }
141
142 //////////////////////////////////////////////////////////////////////////
143
144 PDFNet.terminate();
145 System.out.println("Done.");
146 }
147}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales