Convert to PDF/A - Python Sample Code

Sample code for using Apryse Server SDK to programmatically convert generic PDF documents into ISO-compliant, VeraPDF-valid PDF/A files, or to validate PDF/A compliance. Supports all three PDF/A parts (PDF/A-1, PDF/A-2, PDF/A-3), and covers all conformance levels (A, B, U). Code available in Learn more about our Server SDK and PDF/A Library. A command-line tool for batch conversion and validation is also available.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#---------------------------------------------------------------------------------------
15# The following sample illustrates how to parse and check if a PDF document meets the
16# PDFA standard, using the PDFACompliance class object.
17#---------------------------------------------------------------------------------------
18
19def PrintResults(pdf_a, filename):
20 err_cnt = pdf_a.GetErrorCount()
21 if err_cnt == 0:
22 print(filename + ": OK.")
23 else:
24 print(filename + " is NOT a valid PDFA.")
25 i = 0
26 while i < err_cnt:
27 c = pdf_a.GetError(i)
28 str1 = " - e_PDFA " + str(c) + ": " + PDFACompliance.GetPDFAErrorMessage(c) + "."
29 if True:
30 num_refs = pdf_a.GetRefObjCount(c)
31 if num_refs > 0:
32 str1 = str1 + "\n Objects: "
33 j = 0
34 while j < num_refs:
35 str1 = str1 + str(pdf_a.GetRefObj(c, j))
36 if j < num_refs-1:
37 str1 = str1 + ", "
38 j = j + 1
39 print(str1)
40 i = i + 1
41 print('')
42
43def main():
44 # Relative path to the folder containing the test files.
45 input_path = "../../TestFiles/"
46 output_path = "../../TestFiles/Output/"
47
48 PDFNet.Initialize(LicenseKey)
49 PDFNet.SetColorManagement() # Enable color management (required for PDFA validation).
50
51 #-----------------------------------------------------------
52 # Example 1: PDF/A Validation
53 #-----------------------------------------------------------
54 filename = "newsletter.pdf"
55 # The max_ref_objs parameter to the PDFACompliance constructor controls the maximum number
56 # of object numbers that are collected for particular error codes. The default value is 10
57 # in order to prevent spam. If you need all the object numbers, pass 0 for max_ref_objs.
58 pdf_a = PDFACompliance(False, input_path+filename, None, PDFACompliance.e_Level2B, 0, 0, 10)
59 PrintResults(pdf_a, filename)
60 pdf_a.Destroy()
61
62 #-----------------------------------------------------------
63 # Example 2: PDF/A Conversion
64 #-----------------------------------------------------------
65 filename = "fish.pdf"
66 pdf_a = PDFACompliance(True, input_path + filename, None, PDFACompliance.e_Level2B, 0, 0, 10)
67 filename = "pdfa.pdf"
68 pdf_a.SaveAs(output_path + filename, False)
69 pdf_a.Destroy()
70
71 # Re-validate the document after the conversion...
72 pdf_a = PDFACompliance(False, output_path + filename, None, PDFACompliance.e_Level2B, 0, 0, 10)
73 PrintResults(pdf_a, filename)
74 pdf_a.Destroy()
75
76 PDFNet.Terminate()
77 print("PDFACompliance test completed.")
78
79if __name__ == '__main__':
80 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales