Convert PDF to DOCX, XSLX - PDF2Office

Requirements

Sample code for using Apryse SDK to programmatically convert generic PDF documents to Word, Excel, PowerPoint; provided in Python, C++, C#, Go, Java, Node.js (JavaScript), PHP, Ruby and VB.

To convert files to Office with this Apryse Server SDK sample code:

  1. Complete the Get started with Server SDK process in your language/framework.
  2. After you complete the Get Started with Server SDK work in your language/framework from Step 1 above, next, download the Structured Output Module.
  3. Add sample code provided in this guide

To use this feature in production, your license key will need the Office Conversion Package. Trial keys already include all packages.

Learn more about our Server SDK and PDF to Office Conversion.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.PDF;
9
10namespace PDF2OfficeTestCS
11{
12 /// <summary>
13 // The following sample illustrates how to use the PDF::Convert utility class to convert
14 // documents and files to Office.
15 //
16 // The Structured Output module is an optional PDFNet Add-on that can be used to convert PDF
17 // and other documents into Word, Excel, PowerPoint and HTML format.
18 //
19 // The Apryse SDK Structured Output add-on module can be downloaded from
20 // https://docs.apryse.com/core/info/modules/
21 //
22 // Please contact us if you have any questions.
23 /// </summary>
24
25 class Class1
26 {
27 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
28
29 static Class1() {}
30
31 // Relative path to the folder containing test files.
32 const string inputPath = "../../../../TestFiles/";
33 const string outputPath = "../../../../TestFiles/Output/";
34
35 /// <summary>
36 /// The main entry point for the application.
37 /// </summary>
38 [STAThread]
39 static int Main(string[] args)
40 {
41 // The first step in every application using PDFNet is to initialize the
42 // library. The library is usually initialized only once, but calling
43 // Initialize() multiple times is also fine.
44 PDFNet.Initialize(PDFTronLicense.Key);
45
46 PDFNet.AddResourceSearchPath("../../../../../Lib/");
47
48 if (!StructuredOutputModule.IsModuleAvailable())
49 {
50 Console.WriteLine();
51 Console.WriteLine("Unable to run the sample: Apryse SDK Structured Output module not available.");
52 Console.WriteLine("-----------------------------------------------------------------------------");
53 Console.WriteLine("The Structured Output module is an optional add-on, available for download");
54 Console.WriteLine("at https://docs.apryse.com/core/info/modules/. If you have already");
55 Console.WriteLine("downloaded this module, ensure that the SDK is able to find the required files");
56 Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.");
57 Console.WriteLine();
58 return 0;
59 }
60
61 bool err = false;
62
63 //////////////////////////////////////////////////////////////////////////
64 // Word
65 //////////////////////////////////////////////////////////////////////////
66
67 try
68 {
69 // Convert PDF document to Word
70 Console.WriteLine("Converting PDF to Word");
71
72 string outputFile = outputPath + "paragraphs_and_tables.docx";
73
74 pdftron.PDF.Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile);
75
76 Console.WriteLine("Result saved in " + outputFile);
77 }
78 catch (PDFNetException e)
79 {
80 Console.WriteLine("Unable to convert PDF document to Word, error: " + e.Message);
81 err = true;
82 }
83 catch (Exception e)
84 {
85 Console.WriteLine("Unknown Exception, error: ");
86 Console.WriteLine(e);
87 err = true;
88 }
89
90 //////////////////////////////////////////////////////////////////////////
91
92 try
93 {
94 // Convert PDF document to Word with options
95 Console.WriteLine("Converting PDF to Word with options");
96
97 string outputFile = outputPath + "paragraphs_and_tables_first_page.docx";
98
99 pdftron.PDF.Convert.WordOutputOptions wordOutputOptions = new pdftron.PDF.Convert.WordOutputOptions();
100
101 // Convert only the first page
102 wordOutputOptions.SetPages(1, 1);
103
104 pdftron.PDF.Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile, wordOutputOptions);
105
106 Console.WriteLine("Result saved in " + outputFile);
107 }
108 catch (PDFNetException e)
109 {
110 Console.WriteLine("Unable to convert PDF document to Word, error: " + e.Message);
111 err = true;
112 }
113 catch (Exception e)
114 {
115 Console.WriteLine("Unknown Exception, error: ");
116 Console.WriteLine(e);
117 err = true;
118 }
119
120 //////////////////////////////////////////////////////////////////////////
121 // Excel
122 //////////////////////////////////////////////////////////////////////////
123
124 try
125 {
126 // Convert PDF document to Excel
127 Console.WriteLine("Converting PDF to Excel");
128
129 string outputFile = outputPath + "paragraphs_and_tables.xlsx";
130
131 pdftron.PDF.Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile);
132
133 Console.WriteLine("Result saved in " + outputFile);
134 }
135 catch (PDFNetException e)
136 {
137 Console.WriteLine("Unable to convert PDF document to Excel, error: " + e.Message);
138 err = true;
139 }
140 catch (Exception e)
141 {
142 Console.WriteLine("Unknown Exception, error: ");
143 Console.WriteLine(e);
144 err = true;
145 }
146
147 //////////////////////////////////////////////////////////////////////////
148
149 try
150 {
151 // Convert PDF document to Excel with options
152 Console.WriteLine("Converting PDF to Excel with options");
153
154 string outputFile = outputPath + "paragraphs_and_tables_second_page.xlsx";
155
156 pdftron.PDF.Convert.ExcelOutputOptions excelOutputOptions = new pdftron.PDF.Convert.ExcelOutputOptions();
157
158 // Convert only the second page
159 excelOutputOptions.SetPages(2, 2);
160
161 pdftron.PDF.Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile, excelOutputOptions);
162
163 Console.WriteLine("Result saved in " + outputFile);
164 }
165 catch (PDFNetException e)
166 {
167 Console.WriteLine("Unable to convert PDF document to Excel, error: " + e.Message);
168 err = true;
169 }
170 catch (Exception e)
171 {
172 Console.WriteLine("Unknown Exception, error: ");
173 Console.WriteLine(e);
174 err = true;
175 }
176
177 //////////////////////////////////////////////////////////////////////////
178 // PowerPoint
179 //////////////////////////////////////////////////////////////////////////
180
181 try
182 {
183 // Convert PDF document to PowerPoint
184 Console.WriteLine("Converting PDF to PowerPoint");
185
186 string outputFile = outputPath + "paragraphs_and_tables.pptx";
187
188 pdftron.PDF.Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile);
189
190 Console.WriteLine("Result saved in " + outputFile);
191 }
192 catch (PDFNetException e)
193 {
194 Console.WriteLine("Unable to convert PDF document to PowerPoint, error: " + e.Message);
195 err = true;
196 }
197 catch (Exception e)
198 {
199 Console.WriteLine("Unknown Exception, error: ");
200 Console.WriteLine(e);
201 err = true;
202 }
203
204 //////////////////////////////////////////////////////////////////////////
205
206 try
207 {
208 // Convert PDF document to PowerPoint with options
209 Console.WriteLine("Converting PDF to PowerPoint with options");
210
211 string outputFile = outputPath + "paragraphs_and_tables_first_page.pptx";
212
213 pdftron.PDF.Convert.PowerPointOutputOptions powerPointOutputOptions = new pdftron.PDF.Convert.PowerPointOutputOptions();
214
215 // Convert only the first page
216 powerPointOutputOptions.SetPages(1, 1);
217
218 pdftron.PDF.Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile, powerPointOutputOptions);
219
220 Console.WriteLine("Result saved in " + outputFile);
221 }
222 catch (PDFNetException e)
223 {
224 Console.WriteLine("Unable to convert PDF document to PowerPoint, error: " + e.Message);
225 err = true;
226 }
227 catch (Exception e)
228 {
229 Console.WriteLine("Unknown Exception, error: ");
230 Console.WriteLine(e);
231 err = true;
232 }
233
234 //////////////////////////////////////////////////////////////////////////
235
236 PDFNet.Terminate();
237 Console.WriteLine("Done.");
238 return (err == false ? 0 : 1);
239 }
240 }
241}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales