Edit PDF� - Python Sample Code

Sample code for using Apryse SDK to programmatically edit an existing PDF document's page display list and the graphics state attributes on existing elements. In particular, this sample strips all images from the page and changes the text color to blue, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. You can also build a GUI with interactive PDF editor widgets. Some of Apryse SDK's other functions for programmatically editing PDFs include the Cos/SDF low-level API, page manipulation, and more. Learn more about our Server SDK and PDF Editing & Manipulation Library.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#---------------------------------------------------------------------------------------
15# The sample code shows how to edit the page display list and how to modify graphics state
16# attributes on existing Elements. In particular the sample program strips all images from
17# the page, changes path fill color to red, and changes text color to blue.
18#---------------------------------------------------------------------------------------
19
20def ProcessElements(reader, writer, map):
21 element = reader.Next() # Read page contents
22 while element != None:
23 type = element.GetType()
24 if type == Element.e_image:
25 # remove all images by skipping them
26 pass
27 elif type == Element.e_inline_image:
28 # remove all images by skipping them
29 pass
30 elif type == Element.e_path:
31 # Set all paths to red color.
32 gs = element.GetGState()
33 gs.SetFillColorSpace(ColorSpace.CreateDeviceRGB())
34 gs.SetFillColor(ColorPt(1, 0, 0))
35 writer.WriteElement(element)
36 elif type == Element.e_text: # Process text strings...
37 # Set all text to blue color.
38 gs = element.GetGState()
39 gs.SetFillColorSpace(ColorSpace.CreateDeviceRGB())
40 cp = ColorPt(0, 0, 1)
41 gs.SetFillColor(cp)
42 writer.WriteElement(element)
43 elif type == Element.e_form: # Recursively process form XObjects
44 o = element.GetXObject()
45 map[o.GetObjNum()] = o
46 writer.WriteElement(element)
47 else:
48 writer.WriteElement(element)
49 element = reader.Next()
50
51def main():
52 PDFNet.Initialize(LicenseKey)
53
54 # Relative path to the folder containing the test files.
55 input_path = "../../TestFiles/"
56 output_path = "../../TestFiles/Output/"
57 input_filename = "newsletter.pdf"
58 output_filename = "newsletter_edited.pdf"
59
60
61 # Open the test file
62 print("Opening the input file...")
63 doc = PDFDoc(input_path + input_filename)
64 doc.InitSecurityHandler()
65
66 writer = ElementWriter()
67 reader = ElementReader()
68
69 itr = doc.GetPageIterator()
70
71 while itr.HasNext():
72 page = itr.Current()
73 reader.Begin(page)
74 writer.Begin(page, ElementWriter.e_replacement, False)
75 map1 = {}
76 ProcessElements(reader, writer, map1)
77 writer.End()
78 reader.End()
79
80 map2 = {}
81 while (map1 or map2):
82 for k in map1.keys():
83 obj = map1[k]
84 writer.Begin(obj)
85 reader.Begin(obj, page.GetResourceDict())
86 ProcessElements(reader, writer, map2)
87 reader.End()
88 writer.End()
89
90 del map1[k]
91 if (not map1 and map2):
92 map1.update(map2)
93 map2.clear()
94 itr.Next()
95
96 doc.Save(output_path + output_filename, SDFDoc.e_remove_unused)
97 doc.Close()
98 PDFNet.Terminate()
99 print("Done. Result saved in " + output_filename +"...")
100
101if __name__ == '__main__':
102 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales