DOCX, XSLX to PDF Conversion - OfficeToPDF - Python Sample Code

Sample code for using Apryse Server SDK to convert Office documents to PDF (including Word, Excel, PowerPoint and Publisher) without needing any external dependencies or MS Office licenses. Office to PDF conversion can be performed on a Linux or Windows server to automate Office-centric workflows, or entirely in the user's client (web browser, mobile device). The conversion functionality can be combined with our Viewer to display or annotate Office files (docx, xlsx, pptx) on all major platforms, including Web, Android, iOS, Xamarin, UWP, and Windows. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB.

Learn more about our Server SDK and Office Document Conversion Library.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14# Relative path to the folder containing the test files.
15input_path = "../../TestFiles/"
16output_path = "../../TestFiles/Output/"
17
18#------------------------------------------------------------------------------
19# The following sample illustrates how to use the PDF.Convert utility class
20# to convert MS Office files to PDF
21#
22# This conversion is performed entirely within the PDFNet and has *no*
23# external or system dependencies dependencies -- Conversion results will be
24# the same whether on Windows, Linux or Android.
25#
26# Please contact us if you have any questions.
27#------------------------------------------------------------------------------
28
29def SimpleDocxConvert(input_filename, output_filename):
30 # Start with a PDFDoc (the conversion destination)
31 pdfdoc = PDFDoc()
32
33 # perform the conversion with no optional parameters
34 Convert.OfficeToPDF(pdfdoc, input_path + input_filename, None)
35
36 # save the result
37 pdfdoc.Save(output_path + output_filename, SDFDoc.e_linearized)
38
39 # And we're done!
40 print("Saved " + output_filename )
41
42def FlexibleDocxConvert(input_filename, output_filename):
43 # Start with a PDFDoc (the conversion destination)
44 pdfdoc = PDFDoc()
45
46 options = OfficeToPDFOptions()
47
48 # set up smart font substitutions to improve conversion results
49 # in situations where the original fonts are not available
50 options.SetSmartSubstitutionPluginPath(input_path)
51
52 # create a conversion object -- this sets things up but does not yet
53 # perform any conversion logic.
54 # in a multithreaded environment, this object can be used to monitor
55 # the conversion progress and potentially cancel it as well
56 conversion = Convert.StreamingPDFConversion(pdfdoc, input_path + input_filename, options)
57
58 # Print the progress of the conversion.
59 # print( "Status: " + str(conversion.GetProgress()*100) +"%, " +
60 # conversion.GetProgressLabel())
61
62 # actually perform the conversion
63 # this particular method will not throw on conversion failure, but will
64 # return an error status instead
65 while (conversion.GetConversionStatus() == DocumentConversion.eIncomplete):
66 conversion.ConvertNextPage()
67 # print out the progress status as we go
68 # print("Status: " + str(conversion.GetProgress()*100) + "%, " +
69 # conversion.GetProgressLabel() )
70
71 if(conversion.GetConversionStatus() == DocumentConversion.eSuccess):
72 num_warnings = conversion.GetNumWarnings()
73 # print information about the conversion
74 i = 0
75 for i in range(num_warnings):
76 print("Conversion Warning: " + conversion.GetWarningString(i) )
77 i = i + 1
78
79 # save the result
80 pdfdoc.Save(output_path + output_filename, SDFDoc.e_linearized)
81 # done
82 print("Saved " + output_filename )
83 else:
84 print("Encountered an error during conversion: " + conversion.GetErrorString() )
85
86def main():
87 # The first step in every application using PDFNet is to initialize the
88 # library. The library is usually initialized only once, but calling
89 # Initialize() multiple times is also fine.
90 PDFNet.Initialize(LicenseKey)
91 PDFNet.SetResourcesPath("../../../Resources")
92
93 # first the one-line conversion function
94 SimpleDocxConvert("simple-word_2007.docx", "simple-word_2007.pdf")
95
96 # then the more flexible line-by-line conversion API
97 FlexibleDocxConvert("the_rime_of_the_ancient_mariner.docx", "the_rime_of_the_ancient_mariner.pdf")
98 PDFNet.Terminate()
99
100 print("Done.")
101
102if __name__ == '__main__':
103 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales