HTML2PDF - HTML to PDF Conversion - Python Sample Code

Sample code for using Apryse SDK to directly convert HTML pages to PDF by using 'pdftron.PDF.HTML2PDF', provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. The HTML2PDF converter supports conversion from a string or URL and offers many options to control page size and formatting.

To use this code, you'll need to

  1. Download and get started with Server SDK
  2. Install the HTML2PDF Module

Learn more about our Server SDK and PDF Conversion Library.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#---------------------------------------------------------------------------------------
15# The following sample illustrates how to convert HTML pages to PDF format using
16# the HTML2PDF class.
17#
18# 'pdftron.PDF.HTML2PDF' is an optional PDFNet Add-On utility class that can be
19# used to convert HTML web pages into PDF documents by using an external module (html2pdf).
20#
21# html2pdf modules can be downloaded from https://dev.apryse.com/.
22#
23# Users can convert HTML pages to PDF using the following operations:
24# - Simple one line static method to convert a single web page to PDF.
25# - Convert HTML pages from URL or string, plus optional table of contents, in user defined order.
26# - Optionally configure settings for proxy, images, java script, and more for each HTML page.
27# - Optionally configure the PDF output, including page size, margins, orientation, and more.
28# - Optionally add table of contents, including setting the depth and appearance.
29#---------------------------------------------------------------------------------------
30
31def main():
32 output_path = "../../TestFiles/Output/html2pdf_example"
33 host = "https://docs.apryse.com"
34 page0 = "/"
35 page1 = "/all-products/"
36 page2 = "/web/faq"
37
38 # The first step in every application using PDFNet is to initialize the
39 # library and set the path to common PDF resources. The library is usually
40 # initialized only once, but calling Initialize() multiple times is also fine.
41 PDFNet.Initialize(LicenseKey)
42
43 # For HTML2PDF we need to locate the html2pdf module. If placed with the
44 # PDFNet library, or in the current working directory, it will be loaded
45 # automatically. Otherwise, it must be set manually using HTML2PDF.SetModulePath.
46 HTML2PDF.SetModulePath("../../../PDFNetC/Lib/")
47 if not HTML2PDF.IsModuleAvailable():
48 print("""
49 Unable to run HTML2PDFTest: PDFTron SDK HTML2PDF module not available.
50 ---------------------------------------------------------------
51 The HTML2PDF module is an optional add-on, available for download
52 at https://www.pdftron.com/. If you have already downloaded this
53 module, ensure that the SDK is able to find the required files
54 using the HTML2PDF.SetModulePath() function.""")
55 return
56
57 #--------------------------------------------------------------------------------
58 # Example 1) Simple conversion of a web page to a PDF doc.
59
60 doc = PDFDoc()
61 # now convert a web page, sending generated PDF pages to doc
62 converter = HTML2PDF()
63 converter.InsertFromURL(host + page0)
64 converter.Convert(doc)
65 doc.Save(output_path + "_01.pdf", SDFDoc.e_linearized)
66
67 #--------------------------------------------------------------------------------
68 # Example 2) Modify the settings of the generated PDF pages and attach to an
69 # existing PDF document.
70
71 # open the existing PDF, and initialize the security handler
72 doc = PDFDoc("../../TestFiles/numbered.pdf")
73 doc.InitSecurityHandler()
74
75 # create the HTML2PDF converter object and modify the output of the PDF pages
76 converter = HTML2PDF()
77 converter.SetPaperSize(PrinterMode.e_11x17)
78
79 # insert the web page to convert
80 converter.InsertFromURL(host + page0)
81
82 # convert the web page, appending generated PDF pages to doc
83 converter.Convert(doc)
84 doc.Save(output_path + "_02.pdf", SDFDoc.e_linearized)
85 #--------------------------------------------------------------------------------
86 # Example 3) Convert multiple web pages
87
88 doc = PDFDoc()
89 converter = HTML2PDF()
90
91 header = "<div style='width:15%;margin-left:0.5cm;text-align:left;font-size:10px;color:#0000FF'><span class='date'></span></div><div style='width:70%;direction:rtl;white-space:nowrap;overflow:hidden;text-overflow:clip;text-align:center;font-size:10px;color:#0000FF'><span>PDFTRON HEADER EXAMPLE</span></div><div style='width:15%;margin-right:0.5cm;text-align:right;font-size:10px;color:#0000FF'><span class='pageNumber'></span> of <span class='totalPages'></span></div>"
92 footer = "<div style='width:15%;margin-left:0.5cm;text-align:left;font-size:7px;color:#FF00FF'><span class='date'></span></div><div style='width:70%;direction:rtl;white-space:nowrap;overflow:hidden;text-overflow:clip;text-align:center;font-size:7px;color:#FF00FF'><span>PDFTRON FOOTER EXAMPLE</span></div><div style='width:15%;margin-right:0.5cm;text-align:right;font-size:7px;color:#FF00FF'><span class='pageNumber'></span> of <span class='totalPages'></span></div>"
93 converter.SetHeader(header)
94 converter.SetFooter(footer)
95 converter.SetMargins("1cm", "2cm", ".5cm", "1.5cm")
96
97 settings = WebPageSettings()
98 settings.SetZoom(0.5)
99 converter.InsertFromURL(host + page0)
100 converter.Convert(doc)
101
102 # convert page 1 with the same settings, appending generated PDF pages to doc
103 converter.InsertFromURL(host + page1, settings)
104 converter.Convert(doc)
105
106 # convert page 2 with different settings, appending generated PDF pages to doc
107 another_converter = HTML2PDF()
108 another_converter.SetLandscape(True)
109 another_settings = WebPageSettings()
110 another_settings.SetPrintBackground(False)
111 another_converter.InsertFromURL(host + page2, another_settings)
112 another_converter.Convert(doc)
113
114 doc.Save(output_path + "_03.pdf", SDFDoc.e_linearized)
115
116 #--------------------------------------------------------------------------------
117 # Example 4) Convert HTML string to PDF.
118
119 doc = PDFDoc()
120 converter = HTML2PDF()
121
122 # Our HTML data
123 html = "<html><body><h1>Heading</h1><p>Paragraph.</p></body></html>"
124
125 # Add html data
126 converter.InsertFromHtmlString(html)
127 # Note, InsertFromHtmlString can be mixed with the other Insert methods.
128
129 converter.Convert(doc)
130 doc.Save(output_path + "_04.pdf", SDFDoc.e_linearized)
131
132 #--------------------------------------------------------------------------------
133 # Example 5) Set the location of the log file to be used during conversion.
134
135 doc = PDFDoc()
136 # now convert a web page, sending generated PDF pages to doc
137 converter = HTML2PDF()
138 converter.SetLogFilePath("../../TestFiles/Output/html2pdf.log")
139 converter.InsertFromURL(host + page0)
140 converter.Convert(doc)
141 doc.Save(output_path + "_05.pdf", SDFDoc.e_linearized)
142
143 PDFNet.Terminate()
144
145if __name__ == '__main__':
146 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales