Compress PDFs - Optimizer - Python Sample Code

Sample code for using Apryse SDK to reduce PDF file size by removing redundant information and compressing data streams using the latest in image compression technology. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB. Learn more about our Server SDK.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#---------------------------------------------------------------------------------------
15# The following sample illustrates how to reduce PDF file size using 'pdftron.PDF.Optimizer'.
16# The sample also shows how to simplify and optimize PDF documents for viewing on mobile devices
17# and on the Web using 'pdftron.PDF.Flattener'.
18#
19# @note Both 'Optimizer' and 'Flattener' are separately licensable add-on options to the core PDFNet license.
20#
21# ----
22#
23# 'pdftron.PDF.Optimizer' can be used to optimize PDF documents by reducing the file size, removing
24# redundant information, and compressing data streams using the latest in image compression technology.
25#
26# PDF Optimizer can compress and shrink PDF file size with the following operations:
27# - Remove duplicated fonts, images, ICC profiles, and any other data stream.
28# - Optionally convert high-quality or print-ready PDF files to small, efficient and web-ready PDF.
29# - Optionally down-sample large images to a given resolution.
30# - Optionally compress or recompress PDF images using JBIG2 and JPEG2000 compression formats.
31# - Compress uncompressed streams and remove unused PDF objects.
32# ----
33#
34# 'pdftron.PDF.Flattener' can be used to speed-up PDF rendering on mobile devices and on the Web by
35# simplifying page content (e.g. flattening complex graphics into images) while maintaining vector text
36# whenever possible.
37#
38# Flattener can also be used to simplify process of writing custom converters from PDF to other formats.
39# In this case, Flattener can be used as first step in the conversion pipeline to reduce any PDF to a
40# very simple representation (e.g. vector text on top of a background image).
41#---------------------------------------------------------------------------------------
42
43def main():
44
45 # Relative path to the folder containing the test files.
46 input_path = "../../TestFiles/"
47 output_path = "../../TestFiles/Output/"
48 input_filename = "newsletter"
49
50 # The first step in every application using PDFNet is to initialize the
51 # library and set the path to common PDF resources. The library is usually
52 # initialized only once, but calling Initialize() multiple times is also fine.
53 PDFNet.Initialize(LicenseKey)
54
55 #--------------------------------------------------------------------------------
56 # Example 1) Simple optimization of a pdf with default settings.
57
58 doc = PDFDoc(input_path + input_filename + ".pdf")
59 doc.InitSecurityHandler()
60 Optimizer.Optimize(doc)
61
62 doc.Save(output_path + input_filename + "_opt1.pdf", SDFDoc.e_linearized)
63 doc.Close()
64
65 #--------------------------------------------------------------------------------
66 # Example 2) Reduce image quality and use jpeg compression for
67 # non monochrome images.
68 doc = PDFDoc(input_path + input_filename + ".pdf")
69 doc.InitSecurityHandler()
70 image_settings = ImageSettings()
71
72 # low quality jpeg compression
73 image_settings.SetCompressionMode(ImageSettings.e_jpeg)
74 image_settings.SetQuality(1)
75
76 # Set the output dpi to be standard screen resolution
77 image_settings.SetImageDPI(144,96)
78
79 # this option will recompress images not compressed with
80 # jpeg compression and use the result if the new image
81 # is smaller.
82 image_settings.ForceRecompression(True)
83
84 # this option is not commonly used since it can
85 # potentially lead to larger files. It should be enabled
86 # only if the output compression specified should be applied
87 # to every image of a given type regardless of the output image size
88 #image_settings.ForceChanges(True)
89
90 opt_settings = OptimizerSettings()
91 opt_settings.SetColorImageSettings(image_settings)
92 opt_settings.SetGrayscaleImageSettings(image_settings)
93
94 # use the same settings for both color and grayscale images
95 Optimizer.Optimize(doc, opt_settings)
96
97 doc.Save(output_path + input_filename + "_opt2.pdf", SDFDoc.e_linearized)
98 doc.Close()
99
100 #--------------------------------------------------------------------------------
101 # Example 3) Use monochrome image settings and default settings
102 # for color and grayscale images.
103
104 doc = PDFDoc(input_path + input_filename + ".pdf")
105 doc.InitSecurityHandler()
106
107 mono_image_settings = MonoImageSettings()
108
109 mono_image_settings.SetCompressionMode(MonoImageSettings.e_jbig2)
110 mono_image_settings.ForceRecompression(True)
111
112 opt_settings = OptimizerSettings()
113 opt_settings.SetMonoImageSettings(mono_image_settings)
114
115 Optimizer.Optimize(doc, opt_settings)
116 doc.Save(output_path + input_filename + "_opt3.pdf", SDFDoc.e_linearized)
117 doc.Close()
118
119 # ----------------------------------------------------------------------
120 # Example 4) Use Flattener to simplify content in this document
121 # using default settings
122
123 doc = PDFDoc(input_path + "TigerText.pdf")
124 doc.InitSecurityHandler()
125
126 fl = Flattener()
127 # The following lines can increase the resolution of background
128 # images.
129 #fl.SetDPI(300)
130 #fl.SetMaximumImagePixels(5000000)
131
132 # This line can be used to output Flate compressed background
133 # images rather than DCTDecode compressed images which is the default
134 #fl.SetPreferJPG(false)
135
136 # In order to adjust thresholds for when text is Flattened
137 # the following function can be used.
138 #fl.SetThreshold(Flattener.e_threshold_keep_most)
139
140 # We use e_fast option here since it is usually preferable
141 # to avoid Flattening simple pages in terms of size and
142 # rendering speed. If the desire is to simplify the
143 # document for processing such that it contains only text and
144 # a background image e_simple should be used instead.
145 fl.Process(doc, Flattener.e_fast)
146 doc.Save(output_path + "TigerText_flatten.pdf", SDFDoc.e_linearized)
147 doc.Close()
148
149 # ----------------------------------------------------------------------
150 # Example 5) Optimize a PDF for viewing using SaveViewerOptimized.
151
152 doc = PDFDoc(input_path + input_filename + ".pdf")
153 doc.InitSecurityHandler()
154
155 opts = ViewerOptimizedOptions()
156
157 # set the maximum dimension (width or height) that thumbnails will have.
158 opts.SetThumbnailSize(1500)
159
160 # set thumbnail rendering threshold. A number from 0 (include all thumbnails) to 100 (include only the first thumbnail)
161 # representing the complexity at which SaveViewerOptimized would include the thumbnail.
162 # By default it only produces thumbnails on the first and complex pages.
163 # The following line will produce thumbnails on every page.
164 # opts.SetThumbnailRenderingThreshold(0)
165
166 doc.SaveViewerOptimized(output_path + input_filename + "_SaveViewerOptimized.pdf", opts)
167 doc.Close()
168 PDFNet.Terminate()
169
170if __name__ == '__main__':
171 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales