PDF2Office - Convert PDF to DOCX, XSLX - Java Sample Code

Requirements

Sample code for using Apryse SDK to programmatically convert generic PDF documents to Word, Excel, PowerPoint; provided in Python, C++, C#, Go, Java, Node.js (JavaScript), PHP, Ruby and VB.

To convert files to Office with this Apryse Server SDK sample code:

  1. Complete the Get started with Server SDK process in your language/framework.
  2. After you complete the Get Started with Server SDK work in your language/framework from Step 1 above, next, download the Structured Output Module.
  3. Add sample code provided in this guide

To use this feature in production, your license key will need the Office Conversion Package. Trial keys already include all packages.

Learn more about our Server SDK and PDF to Office Conversion.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8
9//---------------------------------------------------------------------------------------
10// The following sample illustrates how to use the PDF::Convert utility class to convert
11// documents and files to Office.
12//
13// The Structured Output module is an optional PDFNet Add-on that can be used to convert PDF
14// and other documents into Word, Excel, PowerPoint and HTML format.
15//
16// The Apryse SDK Structured Output module can be downloaded from
17// https://docs.apryse.com/core/info/modules/
18//
19// Please contact us if you have any questions.
20//---------------------------------------------------------------------------------------
21
22public class PDF2OfficeTest
23{
24 // Relative path to the folder containing test files.
25 static String inputPath = "../../TestFiles/";
26 static String outputPath = "../../TestFiles/Output/";
27
28 /// <summary>
29 /// The main entry point for the application.
30 /// </summary>
31 public static void main(String[] args)
32 {
33 // The first step in every application using PDFNet is to initialize the
34 // library. The library is usually initialized only once, but calling
35 // Initialize() multiple times is also fine.
36 PDFNet.initialize(PDFTronLicense.Key());
37
38 PDFNet.addResourceSearchPath("../../../Lib/");
39
40 try {
41 if (!StructuredOutputModule.isModuleAvailable()) {
42 System.out.println();
43 System.out.println("Unable to run the sample: Apryse SDK Structured Output module not available.");
44 System.out.println("-----------------------------------------------------------------------------");
45 System.out.println("The Structured Output module is an optional add-on, available for download");
46 System.out.println("at https://docs.apryse.com/core/info/modules/. If you have already");
47 System.out.println("downloaded this module, ensure that the SDK is able to find the required files");
48 System.out.println("using the PDFNet::AddResourceSearchPath() function.");
49 System.out.println();
50 return;
51 }
52 } catch (PDFNetException e) {
53 System.out.println(e);
54 return;
55 } catch (Exception e) {
56 System.out.println(e);
57 return;
58 }
59
60 boolean err = false;
61
62 //////////////////////////////////////////////////////////////////////////
63 // Word
64 //////////////////////////////////////////////////////////////////////////
65
66 try {
67 // Convert PDF document to Word
68 System.out.println("Converting PDF to Word");
69
70 String outputFile = outputPath + "paragraphs_and_tables.docx";
71
72 Convert.toWord(inputPath + "paragraphs_and_tables.pdf", outputFile);
73
74 System.out.println("Result saved in " + outputFile);
75 } catch (PDFNetException e) {
76 System.out.println("Unable to convert PDF document to Word, error: ");
77 System.out.println(e);
78 err = true;
79 } catch (Exception e) {
80 System.out.println("Unknown Exception, error: ");
81 System.out.println(e);
82 err = true;
83 }
84
85 //////////////////////////////////////////////////////////////////////////
86
87 try {
88 // Convert PDF document to Word with options
89 System.out.println("Converting PDF to Word with options");
90
91 String outputFile = outputPath + "paragraphs_and_tables_first_page.docx";
92
93 Convert.WordOutputOptions wordOutputOptions = new Convert.WordOutputOptions();
94
95 // Convert only the first page
96 wordOutputOptions.setPages(1, 1);
97
98 Convert.toWord(inputPath + "paragraphs_and_tables.pdf", outputFile, wordOutputOptions);
99
100 System.out.println("Result saved in " + outputFile);
101 } catch (PDFNetException e) {
102 System.out.println("Unable to convert PDF document to Word, error: ");
103 System.out.println(e);
104 err = true;
105 } catch (Exception e) {
106 System.out.println("Unknown Exception, error: ");
107 System.out.println(e);
108 err = true;
109 }
110
111 //////////////////////////////////////////////////////////////////////////
112 // Excel
113 //////////////////////////////////////////////////////////////////////////
114
115 try {
116 // Convert PDF document to Excel
117 System.out.println("Converting PDF to Excel");
118
119 String outputFile = outputPath + "paragraphs_and_tables.xlsx";
120
121 Convert.toExcel(inputPath + "paragraphs_and_tables.pdf", outputFile);
122
123 System.out.println("Result saved in " + outputFile);
124 } catch (PDFNetException e) {
125 System.out.println("Unable to convert PDF document to Excel, error: ");
126 System.out.println(e);
127 err = true;
128 } catch (Exception e) {
129 System.out.println("Unknown Exception, error: ");
130 System.out.println(e);
131 err = true;
132 }
133
134 //////////////////////////////////////////////////////////////////////////
135
136 try (PDFDoc doc = new PDFDoc()) {
137 // Convert PDF document to Excel with options
138 System.out.println("Converting PDF to Excel with options");
139
140 String outputFile = outputPath + "paragraphs_and_tables_second_page.xlsx";
141
142 Convert.ExcelOutputOptions excelOutputOptions = new Convert.ExcelOutputOptions();
143
144 // Convert only the second page
145 excelOutputOptions.setPages(2, 2);
146
147 Convert.toExcel(inputPath + "paragraphs_and_tables.pdf", outputFile, excelOutputOptions);
148
149 System.out.println("Result saved in " + outputFile);
150 } catch (PDFNetException e) {
151 System.out.println("Unable to convert PDF document to Excel, error: ");
152 System.out.println(e);
153 err = true;
154 } catch (Exception e) {
155 System.out.println("Unknown Exception, error: ");
156 System.out.println(e);
157 err = true;
158 }
159
160 //////////////////////////////////////////////////////////////////////////
161 // PowerPoint
162 //////////////////////////////////////////////////////////////////////////
163
164 try {
165 // Convert PDF document to PowerPoint
166 System.out.println("Converting PDF to PowerPoint");
167
168 String outputFile = outputPath + "paragraphs_and_tables.pptx";
169
170 Convert.toPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile);
171
172 System.out.println("Result saved in " + outputFile);
173 } catch (PDFNetException e) {
174 System.out.println("Unable to convert PDF document to PowerPoint, error: ");
175 System.out.println(e);
176 err = true;
177 } catch (Exception e) {
178 System.out.println("Unknown Exception, error: ");
179 System.out.println(e);
180 err = true;
181 }
182
183 //////////////////////////////////////////////////////////////////////////
184
185 try {
186 // Convert PDF document to PowerPoint with options
187 System.out.println("Converting PDF to PowerPoint with options");
188
189 String outputFile = outputPath + "paragraphs_and_tables_first_page.pptx";
190
191 Convert.PowerPointOutputOptions powerPointOutputOptions = new Convert.PowerPointOutputOptions();
192
193 // Convert only the first page
194 powerPointOutputOptions.setPages(1, 1);
195
196 Convert.toPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile, powerPointOutputOptions);
197
198 System.out.println("Result saved in " + outputFile);
199 } catch (PDFNetException e) {
200 System.out.println("Unable to convert PDF document to PowerPoint, error: ");
201 System.out.println(e);
202 err = true;
203 } catch (Exception e) {
204 System.out.println("Unknown Exception, error: ");
205 System.out.println(e);
206 err = true;
207 }
208
209 //////////////////////////////////////////////////////////////////////////
210
211 PDFNet.terminate();
212 System.out.println("Done.");
213 }
214}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales