PDF2Office - Convert PDF to DOCX, XSLX - Python Sample Code

Requirements

Sample code for using Apryse SDK to programmatically convert generic PDF documents to Word, Excel, PowerPoint; provided in Python, C++, C#, Go, Java, Node.js (JavaScript), PHP, Ruby and VB.

To convert files to Office with this Apryse Server SDK sample code:

  1. Complete the Get started with Server SDK process in your language/framework.
  2. After you complete the Get Started with Server SDK work in your language/framework from Step 1 above, next, download the Structured Output Module.
  3. Add sample code provided in this guide

To use this feature in production, your license key will need the Office Conversion Package. Trial keys already include all packages.

Learn more about our Server SDK and PDF to Office Conversion.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11import platform
12
13sys.path.append("../../LicenseKey/PYTHON")
14from LicenseKey import *
15
16#---------------------------------------------------------------------------------------
17# The following sample illustrates how to use the PDF.Convert utility class to convert
18# documents and files to Word, Excel and PowerPoint.
19#
20# The Structured Output module is an optional PDFNet Add-on that can be used to convert PDF
21# and other documents into Word, Excel, PowerPoint and HTML format.
22#
23# The PDFTron SDK Structured Output module can be downloaded from
24# https://docs.apryse.com/core/info/modules/
25#
26# Please contact us if you have any questions.
27#---------------------------------------------------------------------------------------
28
29# Relative path to the folder containing the test files.
30inputPath = "../../TestFiles/"
31outputPath = "../../TestFiles/Output/"
32
33def main():
34 # The first step in every application using PDFNet is to initialize the
35 # library. The library is usually initialized only once, but calling
36 # Initialize() multiple times is also fine.
37 PDFNet.Initialize(LicenseKey)
38
39 PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/")
40
41 if not StructuredOutputModule.IsModuleAvailable():
42 print("")
43 print("Unable to run the sample: PDFTron SDK Structured Output module not available.")
44 print("-----------------------------------------------------------------------------")
45 print("The Structured Output module is an optional add-on, available for download")
46 print("at https://docs.apryse.com/core/info/modules/. If you have already")
47 print("downloaded this module, ensure that the SDK is able to find the required files")
48 print("using the PDFNet::AddResourceSearchPath() function.")
49 print("")
50 return
51
52 #-----------------------------------------------------------------------------------
53
54 try:
55 # Convert PDF document to Word
56 print("Converting PDF to Word")
57
58 outputFile = outputPath + "paragraphs_and_tables.docx"
59
60 Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile)
61
62 print("Result saved in " + outputFile)
63 except Exception as e:
64 print("Unable to convert PDF document to Word, error: " + str(e))
65
66 #-----------------------------------------------------------------------------------
67
68 try:
69 # Convert PDF document to Word with options
70 print("Converting PDF to Word with options")
71
72 outputFile = outputPath + "paragraphs_and_tables_first_page.docx"
73
74 wordOutputOptions = WordOutputOptions()
75
76 # Convert only the first page
77 wordOutputOptions.SetPages(1, 1)
78
79 Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile, wordOutputOptions)
80
81 print("Result saved in " + outputFile)
82 except Exception as e:
83 print("Unable to convert PDF document to Word, error: " + str(e))
84
85 #-----------------------------------------------------------------------------------
86
87 try:
88 # Convert PDF document to Excel
89 print("Converting PDF to Excel")
90
91 outputFile = outputPath + "paragraphs_and_tables.xlsx"
92
93 Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile)
94
95 print("Result saved in " + outputFile)
96 except Exception as e:
97 print("Unable to convert PDF document to Excel, error: " + str(e))
98
99 #-----------------------------------------------------------------------------------
100
101 try:
102 # Convert PDF document to Excel with options
103 print("Converting PDF to Excel with options")
104
105 outputFile = outputPath + "paragraphs_and_tables_second_page.xlsx"
106
107 excelOutputOptions = ExcelOutputOptions()
108
109 # Convert only the second page
110 excelOutputOptions.SetPages(2, 2)
111
112 Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile, excelOutputOptions)
113
114 print("Result saved in " + outputFile)
115 except Exception as e:
116 print("Unable to convert PDF document to Excel, error: " + str(e))
117
118 #-----------------------------------------------------------------------------------
119
120 try:
121 # Convert PDF document to PowerPoint
122 print("Converting PDF to PowerPoint")
123
124 outputFile = outputPath + "paragraphs_and_tables.pptx"
125
126 Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile)
127
128 print("Result saved in " + outputFile)
129 except Exception as e:
130 print("Unable to convert PDF document to PowerPoint, error: " + str(e))
131
132 #-----------------------------------------------------------------------------------
133
134 try:
135 # Convert PDF document to PowerPoint with options
136 print("Converting PDF to PowerPoint with options")
137
138 outputFile = outputPath + "paragraphs_and_tables_first_page.pptx"
139
140 powerPointOutputOptions = PowerPointOutputOptions()
141
142 # Convert only the first page
143 powerPointOutputOptions.SetPages(1, 1)
144
145 Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile, powerPointOutputOptions)
146
147 print("Result saved in " + outputFile)
148 except Exception as e:
149 print("Unable to convert PDF document to PowerPoint, error: " + str(e))
150
151 #-----------------------------------------------------------------------------------
152
153 PDFNet.Terminate()
154 print("Done.")
155
156if __name__ == '__main__':
157 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales