PDF Page Manipulation - Merge, Copy, Delete, Rearrange - Python Sample Code

Sample code for using Apryse SDK to copy pages from one document to another, delete and rearrange pages, and use ImportPages() method for very efficient copy and merge operations. Sample code provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB.

Learn more about our Server SDK and PDF Editing & Manipulation Library.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14def main():
15 PDFNet.Initialize(LicenseKey)
16
17 # Relative path to the folder containing the test files.
18 input_path = "../../TestFiles/"
19 output_path = "../../TestFiles/Output/"
20
21 # Sample 1 - Split a PDF document into multiple pages
22 print("_______________________________________________")
23 print("Sample 1 - Split a PDF document into multiple pages...")
24 print("Opening the input pdf...")
25 in_doc=PDFDoc(input_path + "newsletter.pdf")
26 in_doc.InitSecurityHandler()
27
28 page_num = in_doc.GetPageCount()
29 i = 1
30 while i<=page_num:
31 new_doc=PDFDoc()
32 new_doc.InsertPages(0, in_doc, i, i, PDFDoc.e_none)
33 new_doc.Save(output_path + "newsletter_split_page_" + str(i) + ".pdf", SDFDoc.e_remove_unused)
34 print("Done. Result saved in newsletter_split_page_" + str(i) + ".pdf")
35 new_doc.Close()
36 i = i + 1
37
38 # Close the open document to free up document memory sooner than waiting for the
39 # garbage collector
40 in_doc.Close()
41
42 # Sample 2 - Merge several PDF documents into one
43 print("_______________________________________________")
44 print("Sample 2 - Merge several PDF documents into one...")
45 new_doc=PDFDoc()
46 new_doc.InitSecurityHandler()
47
48 page_num = 15
49 i = 1
50 while i <= page_num:
51 print("Opening newsletter_split_page_" + str(i) + ".pdf")
52 in_doc=PDFDoc(output_path + "newsletter_split_page_" + str(i) + ".pdf")
53 new_doc.InsertPages(i, in_doc, 1, in_doc.GetPageCount(), PDFDoc.e_none)
54 in_doc.Close()
55 i = i + 1
56
57 new_doc.Save(output_path + "newsletter_merge_pages.pdf", SDFDoc.e_remove_unused)
58 print("Done. Result saved in newsletter_merge_pages.pdf");
59
60 # Close the open document to free up document memory sooner than waiting for the
61 # garbage collector
62 in_doc.Close()
63
64 # Sample 3 - Delete every second page
65 print("_______________________________________________")
66 print("Sample 3 - Delete every second page...")
67 print("Opening the input pdf...")
68 in_doc = PDFDoc(input_path + "newsletter.pdf")
69 in_doc.InitSecurityHandler();
70 page_num = in_doc.GetPageCount()
71
72 while page_num >= 1:
73 itr = in_doc.GetPageIterator(page_num)
74 in_doc.PageRemove(itr)
75 page_num = page_num - 2
76
77 in_doc.Save(output_path + "newsletter_page_remove.pdf", 0)
78 print("Done. Result saved in newsletter_page_remove.pdf...")
79
80 # Close the open document to free up document memory sooner than waiting for the
81 # garbage collector
82 in_doc.Close()
83
84 # Sample 4 - Inserts a page from one document at different
85 # locations within another document
86 print("_______________________________________________")
87 print("Sample 4 - Insert a page at different locations...")
88 print("Opening the input pdf...")
89
90 in1_doc = PDFDoc(input_path + "newsletter.pdf")
91 in1_doc.InitSecurityHandler()
92 in2_doc = PDFDoc(input_path + "fish.pdf")
93 in2_doc.InitSecurityHandler()
94
95 src_page = in2_doc.GetPageIterator()
96 dst_page = in1_doc.GetPageIterator()
97 page_num = 1
98 while dst_page.HasNext():
99 if page_num % 3 == 0:
100 in1_doc.PageInsert(dst_page, src_page.Current())
101 page_num = page_num + 1
102 dst_page.Next()
103 in1_doc.Save(output_path + "newsletter_page_insert.pdf", 0)
104 print("Done. Result saved in newsletter_page_insert.pdf...")
105
106 # Close the open document to free up document memory sooner than waiting for the
107 # garbage collector
108 in1_doc.Close()
109 in2_doc.Close()
110
111 # Sample 5 - Replicate pages within a single document
112 print("_______________________________________________")
113 print("Sample 5 - Replicate pages within a single document...")
114 print("Opening the input pdf...")
115
116 doc = PDFDoc(input_path + "newsletter.pdf")
117 doc.InitSecurityHandler()
118
119 # Replicate the cover page three times (copy page #1 and place it before the
120 # seventh page in the document page sequence)
121 cover = doc.GetPage(1)
122 p7 = doc.GetPageIterator(7)
123 doc.PageInsert(p7, cover)
124 doc.PageInsert(p7, cover)
125 doc.PageInsert(p7, cover)
126
127 # Replicate the cover page two more times by placing it before and after
128 # existing pages.
129 doc.PagePushFront(cover);
130 doc.PagePushBack(cover)
131
132 doc.Save(output_path + "newsletter_page_clone.pdf", 0)
133 print("Done. Result saved in newsletter_page_clone.pdf...")
134 doc.Close()
135
136 # Sample 6 - Use ImportPages() in order to copy multiple pages at once
137 # in order to preserve shared resources between pages (e.g. images, fonts,
138 # colorspaces, etc.)
139 print("_______________________________________________")
140 print("Sample 6 - Preserving shared resources using ImportPages...")
141 print("Opening the input pdf...")
142 in_doc = PDFDoc(input_path + "newsletter.pdf")
143 in_doc.InitSecurityHandler()
144 new_doc = PDFDoc()
145 copy_pages = VectorPage()
146 itr = in_doc.GetPageIterator()
147 while itr.HasNext():
148 copy_pages.push_back(itr.Current())
149 itr.Next()
150
151 imported_pages = new_doc.ImportPages(copy_pages)
152 i = iter(imported_pages)
153 for x in i:
154 new_doc.PagePushFront(x) # Order pages in reverse order.
155 # Use PagePushBack() if you would like to preserve the same order.
156
157 new_doc.Save(output_path + "newsletter_import_pages.pdf", 0)
158
159 # Close the open document to free up document memory sooner than waiting for the
160 # garbage collector
161 in_doc.Close()
162 new_doc.Close()
163
164 PDFNet.Terminate()
165
166 print("Done. Result saved in newsletter_import_pages.pdf...\n")
167 print("Note that the output file size is less than half the size")
168 print("of the file produced using individual page copy operations")
169 print("between two documents")
170
171if __name__ == '__main__':
172 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales