Convert to PDF to PNG, JPG, BMP or TIFF - Python Sample Code

Sample code to use Apryse SDK's built-in rasterizer to render PDF images on the fly and save the resulting images in various raster image formats (such as PNG, JPEG, BMP, TIFF). Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK and PDF Conversion Library.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14# Relative path to the folder containing test files.
15input_path = "../../TestFiles/"
16output_path = "../../TestFiles/Output/"
17
18#---------------------------------------------------------------------------------------
19# The following sample illustrates how to convert PDF documents to various raster image
20# formats (such as PNG, JPEG, BMP, TIFF, etc), as well as how to convert a PDF page to
21# GDI+ Bitmap for further manipulation and/or display in WinForms applications.
22#---------------------------------------------------------------------------------------
23
24def main():
25
26 # The first step in every application using PDFNet is to initialize the
27 # library and set the path to common PDF resources. The library is usually
28 # initialized only once, but calling Initialize() multiple times is also fine.
29 PDFNet.Initialize(LicenseKey)
30
31 # Optional: Set ICC color profiles to fine tune color conversion
32 # for PDF 'device' color spaces...
33
34 # PDFNet.SetResourcesPath("../../../resources")
35 # PDFNet.SetColorManagement()
36 # PDFNet.SetDefaultDeviceCMYKProfile("D:/Misc/ICC/USWebCoatedSWOP.icc")
37 # PDFNet.SetDefaultDeviceRGBProfile("AdobeRGB1998.icc") # will search in PDFNet resource folder.
38
39 # ----------------------------------------------------
40 # Optional: Set predefined font mappings to override default font
41 # substitution for documents with missing fonts...
42
43 # PDFNet.AddFontSubst("StoneSans-Semibold", "C:/WINDOWS/Fonts/comic.ttf")
44 # PDFNet.AddFontSubst("StoneSans", "comic.ttf") # search for 'comic.ttf' in PDFNet resource folder.
45 # PDFNet.AddFontSubst(PDFNet.e_Identity, "C:/WINDOWS/Fonts/arialuni.ttf")
46 # PDFNet.AddFontSubst(PDFNet.e_Japan1, "C:/Program Files/Adobe/Acrobat 7.0/Resource/CIDFont/KozMinProVI-Regular.otf")
47 # PDFNet.AddFontSubst(PDFNet.e_Japan2, "c:/myfonts/KozMinProVI-Regular.otf")
48 # PDFNet.AddFontSubst(PDFNet.e_Korea1, "AdobeMyungjoStd-Medium.otf")
49 # PDFNet.AddFontSubst(PDFNet.e_CNS1, "AdobeSongStd-Light.otf")
50 # PDFNet.AddFontSubst(PDFNet.e_GB1, "AdobeMingStd-Light.otf")
51
52 #Example 1) Convert the first page to PNG and TIFF at 92 DPI.
53
54 # PDFDraw class is used to rasterize PDF pages.
55 draw = PDFDraw()
56
57 #--------------------------------------------------------------------------------
58 # Example 1) Convert the first page to PNG and TIFF at 92 DPI.
59 # A three step tutorial to convert PDF page to an image.
60
61 # A) Open the PDF document.
62 doc = PDFDoc(input_path + "tiger.pdf")
63
64 # Initialize the security handler, in case the PDF is encrypted.
65 doc.InitSecurityHandler()
66
67 # B) The output resolution is set to 92 DPI.
68 draw.SetDPI(92)
69
70 # C) Rasterize the first page in the document and save the result as PNG.
71 itr = doc.GetPageIterator()
72 draw.Export(itr.Current(), output_path + "tiger_92dpi.png")
73
74 print("Example 1: tiger_92dpi.png")
75
76 # Export the same page as TIFF
77 itr = doc.GetPageIterator()
78 draw.Export(itr.Current(), (output_path + "tiger_92dpi.tif"), "TIFF")
79
80 #--------------------------------------------------------------------------------
81 # Example 2) Convert the all pages in a given document to JPEG at 72 DPI.
82
83 print("Example 2:")
84
85 hint_set = ObjSet() # A collection of rendering 'hits'.
86
87 doc = PDFDoc(input_path + "newsletter.pdf")
88 # Initialize the security handler, in case the PDF is encrypted.
89 doc.InitSecurityHandler()
90
91 # Set the output resolution is to 72 DPI.
92 draw.SetDPI(72)
93
94 # Use optional encoder parameter to specify JPEG quality.
95 encoder_param = hint_set.CreateDict()
96 encoder_param.PutNumber("Quality", 80)
97
98 # Traverse all pages in the document.
99 itr = doc.GetPageIterator()
100 while itr.HasNext():
101 filename = "newsletter" + str(itr.Current().GetIndex()) + ".jpg"
102 print(filename)
103 draw.Export(itr.Current(), output_path + filename, "JPEG", encoder_param)
104 itr.Next()
105 print("Done.")
106
107 # Examples 3-5
108 # Common code for remaining samples.
109 tiger_doc = PDFDoc(input_path + "tiger.pdf")
110 # Initialize the security handler, in case the PDF is encrypted.
111 tiger_doc.InitSecurityHandler()
112 page = tiger_doc.GetPage(1)
113
114 #--------------------------------------------------------------------------------
115 # Example 3) Convert the first page to raw bitmap. Also, rotate the
116 # page 90 degrees and save the result as RAW.
117 draw.SetDPI(100) # Set the output resolution is to 100 DPI.
118 draw.SetRotate(Page.e_90) # Rotate all pages 90 degrees clockwise.
119 bmp = draw.GetBitmap(page, PDFDraw.e_rgb)
120
121 # Save the raw RGB data to disk.
122 if sys.version_info.major >= 3:
123 f = open(output_path + "tiger_100dpi_rot90.raw", "w")
124 else:
125 f = open(output_path + "tiger_100dpi_rot90.raw", "wb")
126 try:
127 f.write(str(bmp.GetBuffer()))
128 finally:
129 f.close()
130
131 print("Example 3: tiger_100dpi_rot90.raw")
132
133 draw.SetRotate(Page.e_0) # Disable image rotation for remaining samples.
134
135 #--------------------------------------------------------------------------------
136 # Example 4) Convert PDF page to a fixed image size. Also illustrates some
137 # other features in PDFDraw class such as rotation, image stretching, exporting
138 # to grayscale, or monochrome.
139
140 # Initialize render 'gray_hint' parameter, that is used to control the
141 # rendering process. In this case we tell the rasterizer to export the image as
142 # 1 Bit Per Component (BPC) image.
143 mono_hint = hint_set.CreateDict()
144 mono_hint.PutNumber("BPC", 1)
145
146 # SetImageSize can be used instead of SetDPI() to adjust page scaling
147 # dynamically so that given image fits into a buffer of given dimensions.
148 draw.SetImageSize(1000, 1000) # Set the output image to be 1000 wide and 1000 pixels tall
149 draw.Export(page, output_path + "tiger_1000x1000.png", "PNG", mono_hint)
150 print("Example 4: tiger_1000x1000.png")
151
152 draw.SetImageSize(200, 400) # Set the output image to be 200 wide and 400 pixels tall
153 draw.SetRotate(Page.e_180) # Rotate all pages 90 degrees clockwise
154
155 # 'gray_hint' tells the rasterizer to export the image as grayscale.
156 gray_hint = hint_set.CreateDict()
157 gray_hint.PutName("ColorSpace", "Gray")
158
159 draw.Export(page, (output_path + "tiger_200x400_rot180.png"), "PNG", gray_hint)
160 print("Example 4: tiger_200x400_rot180.png")
161
162 draw.SetImageSize(400, 200, False) # The third parameter sets 'preserve-aspect-ratio' to False
163 draw.SetRotate(Page.e_0) # Disable image rotation
164 draw.Export(page, output_path + "tiger_400x200_stretch.jpg", "JPEG")
165 print("Example 4: tiger_400x200_stretch.jpg")
166
167 #--------------------------------------------------------------------------------
168 # Example 5) Zoom into a specific region of the page and rasterize the
169 # area at 200 DPI and as a thumbnail (i.e. a 50x50 pixel image).
170 zoom_rect = Rect(216, 522, 330, 600)
171 page.SetCropBox(zoom_rect) # Set the page crop box.
172
173 # Select the crop region to be used for drawing.
174 draw.SetPageBox(Page.e_crop)
175 draw.SetDPI(900) # Set the output image resolution to 900 DPI.
176 draw.Export(page, output_path + "tiger_zoom_900dpi.png", "PNG")
177 print("Example 5: tiger_zoom_900dpi.png")
178
179 # -------------------------------------------------------------------------------
180 # Example 6)
181 draw.SetImageSize(50, 50) # Set the thumbnail to be 50x50 pixel image.
182 draw.Export(page, output_path + "tiger_zoom_50x50.png", "PNG")
183 print("Example 6: tiger_zoom_50x50.png")
184
185 cmyk_hint = hint_set.CreateDict()
186 cmyk_hint.PutName("ColorSpace", "CMYK")
187
188 #--------------------------------------------------------------------------------
189 # Example 7) Convert the first PDF page to CMYK TIFF at 92 DPI.
190 # A three step tutorial to convert PDF page to an image
191 # A) Open the PDF document
192 doc = PDFDoc(input_path + "tiger.pdf")
193 # Initialize the security handler, in case the PDF is encrypted.
194 doc.InitSecurityHandler()
195
196 # The output resolution is set to 92 DPI.
197 draw.SetDPI(92)
198
199 # C) Rasterize the first page in the document and save the result as TIFF.
200 pg = doc.GetPage(1)
201 draw.Export(pg, output_path + "out1.tif", "TIFF", cmyk_hint)
202 print("Example 7: out1.tif")
203
204 doc.Close()
205
206 # A) Open the PDF document.
207 doc = PDFDoc(input_path + "tiger.pdf");
208 # Initialize the security handler, in case the PDF is encrypted.
209 doc.InitSecurityHandler();
210
211 # B) Get the page matrix
212 pg = doc.GetPage(1);
213 box = Page.e_crop;
214 mtx = pg.GetDefaultMatrix(True, box);
215 # We want to render a quadrant, so use half of width and height
216 pg_w = pg.GetPageWidth(box) / 2;
217 pg_h = pg.GetPageHeight(box) / 2;
218
219 # C) Scale matrix from PDF space to buffer space
220 dpi = 96.0;
221 scale = dpi / 72.0; # PDF space is 72 dpi
222 buf_w = int(scale * pg_w);
223 buf_h = int(scale * pg_h);
224 bytes_per_pixel = 4; # BGRA buffer
225 buf_size = buf_w * buf_h * bytes_per_pixel;
226 mtx.Translate(0, -pg_h); # translate by '-pg_h' since we want south-west quadrant
227 mtx = Matrix2D(scale, 0, 0, scale, 0, 0).Multiply(mtx);
228
229 # D) Rasterize page into memory buffer, according to our parameters
230 rast = PDFRasterizer();
231 buf = rast.Rasterize(pg, buf_w, buf_h, buf_w * bytes_per_pixel, bytes_per_pixel, True, mtx);
232
233 # buf now contains raw BGRA bitmap.
234 print("Example 8: Successfully rasterized into memory buffer.");
235
236 #--------------------------------------------------------------------------------
237 # Example 9) Export raster content to PNG using different image smoothing settings.
238 text_doc = PDFDoc(input_path + "lorem_ipsum.pdf");
239 text_doc.InitSecurityHandler();
240
241 draw.SetImageSmoothing(False, False);
242 filename = "raster_text_no_smoothing.png";
243 draw.Export(text_doc.GetPageIterator().Current(), output_path + filename);
244 print("Example 9 a): " + filename + ". Done.");
245
246 filename = "raster_text_smoothed.png";
247 draw.SetImageSmoothing(True, False); # second argument = default quality bilinear resampling
248 draw.Export(text_doc.GetPageIterator().Current(), output_path + filename);
249 print("Example 9 b): " + filename + ". Done.");
250
251 filename = "raster_text_high_quality.png";
252 draw.SetImageSmoothing(True, True); # second argument = default quality bilinear resampling
253 draw.Export(text_doc.GetPageIterator().Current(), output_path + filename);
254 print("Example 9 c): " + filename + ". Done.");
255
256 #--------------------------------------------------------------------------------
257 # Example 10) Export separations directly, without conversion to an output colorspace
258
259 separation_doc = PDFDoc(input_path + "op_blend_test.pdf");
260 separation_doc.InitSecurityHandler();
261 separation_hint = hint_set.CreateDict();
262 separation_hint.PutName("ColorSpace", "Separation");
263 draw.SetDPI(96);
264 draw.SetImageSmoothing(True, True);
265 draw.SetOverprint(PDFRasterizer.e_op_on);
266
267 filename = "merged_separations.png";
268 draw.Export(separation_doc.GetPageIterator().Current(), output_path + filename, "PNG");
269 print("Example 10 a): " + filename + ". Done.");
270
271 filename = "separation";
272 draw.Export(separation_doc.GetPageIterator().Current(), output_path + filename, "PNG", separation_hint);
273 print("Example 10 b): " + filename + "_[ink].png. Done.");
274
275 filename = "separation_NChannel.tif";
276 draw.Export(separation_doc.GetPageIterator().Current(), output_path + filename, "TIFF", separation_hint);
277 print("Example 10 c): " + filename + ". Done.");
278
279 PDFNet.Terminate()
280
281if __name__ == '__main__':
282 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales