PDF2Office - Convert PDF to DOCX, XSLX - Python Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to Word, Excel, PowerPoint; provided in Python, C++, C#, Go, Java, Node.js (JavaScript), PHP, Ruby and VB. Learn more about our PDF to Office

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11import platform
12
13sys.path.append("../../LicenseKey/PYTHON")
14from LicenseKey import *
15
16#---------------------------------------------------------------------------------------
17# The following sample illustrates how to use the PDF.Convert utility class to convert
18# documents and files to Word, Excel and PowerPoint.
19#
20# The Structured Output module is an optional PDFNet Add-on that can be used to convert PDF
21# and other documents into Word, Excel, PowerPoint and HTML format.
22#
23# The PDFTron SDK Structured Output module can be downloaded from
24# https://docs.apryse.com/core/info/modules/
25#
26# Please contact us if you have any questions.
27#---------------------------------------------------------------------------------------
28
29# Relative path to the folder containing the test files.
30inputPath = "../../TestFiles/"
31outputPath = "../../TestFiles/Output/"
32
33def main():
34 # The first step in every application using PDFNet is to initialize the
35 # library. The library is usually initialized only once, but calling
36 # Initialize() multiple times is also fine.
37 PDFNet.Initialize(LicenseKey)
38
39 PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/")
40
41 if not StructuredOutputModule.IsModuleAvailable():
42 print("")
43 print("Unable to run the sample: PDFTron SDK Structured Output module not available.")
44 print("-----------------------------------------------------------------------------")
45 print("The Structured Output module is an optional add-on, available for download")
46 print("at https://docs.apryse.com/core/info/modules/. If you have already")
47 print("downloaded this module, ensure that the SDK is able to find the required files")
48 print("using the PDFNet::AddResourceSearchPath() function.")
49 print("")
50 return
51
52 #-----------------------------------------------------------------------------------
53
54 try:
55 # Convert PDF document to Word
56 print("Converting PDF to Word")
57
58 outputFile = outputPath + "paragraphs_and_tables.docx"
59
60 Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile)
61
62 print("Result saved in " + outputFile)
63 except Exception as e:
64 print("Unable to convert PDF document to Word, error: " + str(e))
65
66 #-----------------------------------------------------------------------------------
67
68 try:
69 # Convert PDF document to Word with options
70 print("Converting PDF to Word with options")
71
72 outputFile = outputPath + "paragraphs_and_tables_first_page.docx"
73
74 wordOutputOptions = WordOutputOptions()
75
76 # Convert only the first page
77 wordOutputOptions.SetPages(1, 1)
78
79 Convert.ToWord(inputPath + "paragraphs_and_tables.pdf", outputFile, wordOutputOptions)
80
81 print("Result saved in " + outputFile)
82 except Exception as e:
83 print("Unable to convert PDF document to Word, error: " + str(e))
84
85 #-----------------------------------------------------------------------------------
86
87 try:
88 # Convert PDF document to Excel
89 print("Converting PDF to Excel")
90
91 outputFile = outputPath + "paragraphs_and_tables.xlsx"
92
93 Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile)
94
95 print("Result saved in " + outputFile)
96 except Exception as e:
97 print("Unable to convert PDF document to Excel, error: " + str(e))
98
99 #-----------------------------------------------------------------------------------
100
101 try:
102 # Convert PDF document to Excel with options
103 print("Converting PDF to Excel with options")
104
105 outputFile = outputPath + "paragraphs_and_tables_second_page.xlsx"
106
107 excelOutputOptions = ExcelOutputOptions()
108
109 # Convert only the second page
110 excelOutputOptions.SetPages(2, 2)
111
112 Convert.ToExcel(inputPath + "paragraphs_and_tables.pdf", outputFile, excelOutputOptions)
113
114 print("Result saved in " + outputFile)
115 except Exception as e:
116 print("Unable to convert PDF document to Excel, error: " + str(e))
117
118 #-----------------------------------------------------------------------------------
119
120 try:
121 # Convert PDF document to PowerPoint
122 print("Converting PDF to PowerPoint")
123
124 outputFile = outputPath + "paragraphs_and_tables.pptx"
125
126 Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile)
127
128 print("Result saved in " + outputFile)
129 except Exception as e:
130 print("Unable to convert PDF document to PowerPoint, error: " + str(e))
131
132 #-----------------------------------------------------------------------------------
133
134 try:
135 # Convert PDF document to PowerPoint with options
136 print("Converting PDF to PowerPoint with options")
137
138 outputFile = outputPath + "paragraphs_and_tables_first_page.pptx"
139
140 powerPointOutputOptions = PowerPointOutputOptions()
141
142 # Convert only the first page
143 powerPointOutputOptions.SetPages(1, 1)
144
145 Convert.ToPowerPoint(inputPath + "paragraphs_and_tables.pdf", outputFile, powerPointOutputOptions)
146
147 print("Result saved in " + outputFile)
148 except Exception as e:
149 print("Unable to convert PDF document to PowerPoint, error: " + str(e))
150
151 #-----------------------------------------------------------------------------------
152
153 PDFNet.Terminate()
154 print("Done.")
155
156if __name__ == '__main__':
157 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales