Convert to PDF/UA - Python Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents into ISO-compliant, VeraPDF-valid PDF/UA files. Supports PDF/UA-1. Sample code provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB.

Learn more about our Server SDK and PDF/UA Library.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.SDF;
8using pdftron.PDF;
9using pdftron.PDF.PDFUA;
10
11//---------------------------------------------------------------------------------------
12// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
13// Note: this feature is currently experimental and subject to change
14//
15// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
16// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
17// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
18//---------------------------------------------------------------------------------------
19namespace PDFUATestCS
20{
21	class PDFUATest
22	{
23		private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
24		static PDFUATest() {}
25		
26		// Relative path to the folder containing test files.
27		static string input_path =  "../../../../TestFiles/";
28		static string output_path = "../../../../TestFiles/Output/";
29
30		// DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
31		// If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder
32		static string extraction_module_path = "../../../../../Lib/";
33
34
35		/// <summary>
36		/// The main entry point for the application.
37		/// </summary>
38		[STAThread]
39		static void Main(string[] args)
40		{
41			try {
42				PDFNet.Initialize(PDFTronLicense.Key);
43
44				String input_file1 = input_path + "autotag_input.pdf";
45				String input_file2 = input_path + "table.pdf";
46				String output_file1 = output_path + "autotag_pdfua.pdf";
47				String output_file2 = output_path + "table_pdfua_linearized.pdf";
48
49				//-----------------------------------------------------------
50				// Example: PDF/UA Conversion
51				//-----------------------------------------------------------
52				Console.WriteLine("AutoConverting...");
53
54				PDFNet.AddResourceSearchPath(extraction_module_path);
55				if(!DataExtractionModule.IsModuleAvailable(DataExtractionModule.DataExtractionEngine.e_doc_structure))
56				{
57					Console.Error.WriteLine("Unable to run PDFUATest: Apryse SDK Data Extraction module not available.");
58					Console.Error.WriteLine("---------------------------------------------------------------");
59					Console.Error.WriteLine("The Data Extraction module is an optional add-on, available for download");
60					Console.Error.WriteLine("at https://apryse.com/. If you have already downloaded this");
61					Console.Error.WriteLine("module, ensure that the SDK is able to find the required files");
62					Console.Error.WriteLine("using the PDFNet::AddResourceSearchPath() function.");
63					Console.Error.WriteLine("");
64					return;
65				}
66
67				PDFUAConformance pdf_ua = new PDFUAConformance();
68
69				Console.WriteLine("Simple Conversion...");
70				{
71					// Perform conversion using default options
72					pdf_ua.AutoConvert(input_file1, output_file1);
73				}
74
75				Console.WriteLine("Converting With Options...");
76				{
77					PDFUAOptions pdf_ua_opts = new PDFUAOptions();
78					pdf_ua_opts.SetSaveLinearized(true); // Linearize when saving output
79					// Note: if file is password protected, you can use pdf_ua_opts.setPassword()
80
81					// Perform conversion using the options we specify
82					pdf_ua.AutoConvert(input_file2, output_file2, pdf_ua_opts);
83				}
84
85			}
86			catch (pdftron.Common.PDFNetException e)
87			{
88				Console.Error.WriteLine(e.GetMessage());
89				Environment.Exit(-1);
90			}
91			finally
92			{
93				PDFNet.Terminate();
94				Console.WriteLine("PDFUAConformance test completed.");
95			}
96		}
97	}
98}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8	"fmt"
9	"testing"
10	"flag"
11	. "github.com/pdftron/pdftron-go/v2"
12)
13
14var licenseKey string
15var modulePath string
16
17func init() {
18	flag.StringVar(&licenseKey, "license", "", "License key for Apryse SDK")
19	flag.StringVar(&modulePath, "modulePath", "", "Module path for Apryse SDK")
20}
21
22//---------------------------------------------------------------------------------------
23// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
24// Note: this feature is currently experimental and subject to change
25//
26// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
27// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
28// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
29//---------------------------------------------------------------------------------------
30
31// Relative path to the folder containing the test files.
32var inputPath = "../TestFiles/"
33var outputPath = "../TestFiles/Output/"
34
35func TestPDFUA(t *testing.T) {
36
37	inputFile1 := inputPath + "autotag_input.pdf"
38	inputFile2 := inputPath + "table.pdf"
39	outputFile1 := outputPath + "autotag_pdfua.pdf"
40	outputFile2 := outputPath + "table_pdfua_linearized.pdf"
41
42	PDFNetInitialize(licenseKey)
43
44	fmt.Println("AutoConverting...")
45
46	PDFNetAddResourceSearchPath(modulePath)
47
48	if !DataExtractionModuleIsModuleAvailable(DataExtractionModuleE_DocStructure) {
49		fmt.Println("")
50		fmt.Println("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
51		fmt.Println("-----------------------------------------------------------------------------")
52		fmt.Println("The Data Extraction suite is an optional add-on, available for download")
53		fmt.Println("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
54		fmt.Println("downloaded this module, ensure that the SDK is able to find the required files")
55		fmt.Println("using the PDFNetAddResourceSearchPath() function.")
56		fmt.Println("")
57		PDFNetTerminate()
58		return
59	}
60
61	pdfua := NewPDFUAConformance()
62
63	fmt.Println("Simple Conversion...")
64
65	// Perform conversion using default options
66	pdfua.AutoConvert(inputFile1, outputFile1)
67
68	fmt.Println("Converting With Options...")
69
70	pdfuaOpts := NewPDFUAOptions()
71	pdfuaOpts.SetSaveLinearized(true) // Linearize when saving output
72	// Note: if file is password protected, you can use pdfuaOpts.SetPassword()
73
74	// Perform conversion using the options we specify
75	pdfua.AutoConvert(inputFile2, outputFile2, pdfuaOpts)
76
77	PDFNetTerminate()
78	fmt.Println("PDFUAConformance test completed.")
79}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8import com.pdftron.pdf.pdfua.*;
9
10//---------------------------------------------------------------------------------------
11// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
12// Note: this feature is currently experimental and subject to change
13//
14// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
15// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
16// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
17//---------------------------------------------------------------------------------------
18public class PDFUATest {
19
20    // Relative path to the folder containing test files.
21    public static final String input_path = "../../TestFiles/";
22    public static final String output_path = "../../TestFiles/Output/";
23
24    // DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
25    // If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder
26    public static final String extraction_module_path = "../../../Lib/";
27
28    public static void main(String[] args) {
29        try {
30            PDFNet.initialize(PDFTronLicense.Key());
31
32            String input_file1 = input_path + "autotag_input.pdf";
33            String input_file2 = input_path + "table.pdf";
34            String output_file1 = output_path + "autotag_pdfua.pdf";
35            String output_file2 = output_path + "table_pdfua_linearized.pdf";
36
37            //-----------------------------------------------------------
38            // Example: PDF/UA Conversion
39            //-----------------------------------------------------------
40            System.out.println("AutoConverting...");
41
42            PDFNet.addResourceSearchPath(extraction_module_path);
43            if(!DataExtractionModule.isModuleAvailable(DataExtractionModule.DataExtractionEngine.e_doc_structure))
44            {
45                System.out.println("Unable to run PDFUATest: Apryse SDK Data Extraction module not available.");
46                System.out.println("---------------------------------------------------------------");
47                System.out.println("The Data Extraction module is an optional add-on, available for download");
48                System.out.println("at https://apryse.com/. If you have already downloaded this");
49                System.out.println("module, ensure that the SDK is able to find the required files");
50                System.out.println("using the PDFNet::AddResourceSearchPath() function.");
51                System.out.println("");
52                return;
53            }
54
55            PDFUAConformance pdf_ua = new PDFUAConformance();
56
57            System.out.println("Simple Conversion...");
58            {
59                // Perform conversion using default options
60                pdf_ua.autoConvert(input_file1, output_file1);
61            }
62
63            System.out.println("Converting With Options...");
64            {
65                PDFUAOptions pdf_ua_opts = new PDFUAOptions();
66                pdf_ua_opts.setSaveLinearized(true); // Linearize when saving output
67                // Note: if file is password protected, you can use pdf_ua_opts.setPassword()
68
69                // Perform conversion using the options we specify
70                pdf_ua.autoConvert(input_file2, output_file2, pdf_ua_opts);
71            }
72
73        } catch (PDFNetException e) {
74            System.out.println(e.getMessage());
75        } finally {
76            PDFNet.terminate();
77            System.out.println("PDFUAConformance test completed.");
78        }
79    }
80
81}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5#include <iostream>
6#include <PDF/PDFNet.h>
7#include <PDF/DataExtractionModule.h>
8#include "../../LicenseKey/CPP/LicenseKey.h"
9#include <PDF/PDFUA/PDFUAConformance.h>
10#include <PDF/PDFUA/PDFUAOptions.h>
11using namespace std;
12using namespace pdftron;
13using namespace pdftron::PDF;
14using namespace pdftron::PDF::PDFUA;
15//---------------------------------------------------------------------------------------
16// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
17// Note: this feature is currently experimental and subject to change
18//
19// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
20// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
21// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
22//---------------------------------------------------------------------------------------
23int main(int argc, char *argv[])
24{
25	// Relative path to the folder containing test files.
26	static UString input_path("../../TestFiles/");
27	static UString output_path("../../TestFiles/Output/");
28	// DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
29	// If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder.
30	static UString extraction_module_path("../../../Lib/");
31	UString input_file1 = input_path + "autotag_input.pdf";
32	UString input_file2 = input_path + "table.pdf";
33	UString output_file1 = output_path + "autotag_pdfua.pdf";
34	UString output_file2 = output_path + "table_pdfua_linearized.pdf";
35	int ret = 0;
36	try
37	{
38		PDFNet::Initialize(LicenseKey);
39		cout << "AutoConverting..." << endl;
40		PDFNet::AddResourceSearchPath(extraction_module_path);
41		if (!DataExtractionModule::IsModuleAvailable(DataExtractionModule::e_DocStructure))
42		{
43			cout << endl;
44			cout << "Unable to run PDFUATest: Apryse SDK Data Extraction module not available." << endl;
45			cout << "---------------------------------------------------------------" << endl;
46			cout << "The Data Extraction module is an optional add-on, available for download" << endl;
47			cout << "at https://apryse.com/. If you have already downloaded this" << endl;
48			cout << "module, ensure that the SDK is able to find the required files" << endl;
49			cout << "using the PDFNet::AddResourceSearchPath() function." << endl << endl;
50			return 1;
51		}
52		PDFUAConformance pdf_ua;
53		cout << "Simple Conversion..." << endl;
54		{
55			// Perform conversion using default options
56			pdf_ua.AutoConvert(input_file1, output_file1);
57		}
58		cout << "Converting With Options..." << endl;
59		{
60			PDFUAOptions pdf_ua_opts;
61			pdf_ua_opts.SetSaveLinearized(true); // Linearize when saving output
62			// Note: if file is password protected, you can use pdf_ua_opts.SetPassword()
63			// Perform conversion using the options we specify
64			pdf_ua.AutoConvert(input_file2, output_file2, pdf_ua_opts);
65		}
66	}
67	catch (Common::Exception& e)
68	{
69		cout << e << endl;
70		ret = 1;
71	}
72	catch (...) {
73		cout << "Unknown Exception" << endl;
74		ret = 1;
75	}
76	cout << "PDFUAConformance test completed." << endl;
77	PDFNet::Terminate();
78	return ret;
79}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5const { PDFNet } = require('../../lib/pdfnet.js');
6const PDFTronLicense = require('../../LicenseKey/NODEJS/LicenseKey');
7((exports) => {
8	'use strict';
9	exports.runPDFUATest = () => {
10		//---------------------------------------------------------------------------------------
11		// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
12		// Note: this feature is currently experimental and subject to change
13		//
14		// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
15		// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
16		// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
17		//---------------------------------------------------------------------------------------
18		const main = async () => {
19			// Relative path to the folder containing test files.
20			const input_path = '../TestFiles/';
21			const output_path = '../TestFiles/Output/';
22			// DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
23			// If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder
24			const extraction_module_path = '../../lib/';
25			const input_file1 = input_path + 'autotag_input.pdf';
26			const input_file2 = input_path + 'table.pdf';
27			const output_file1 = output_path + 'autotag_pdfua.pdf';
28			const output_file2 = output_path + 'table_pdfua_linearized.pdf';
29			try {
30				//-----------------------------------------------------------
31				// Example: PDF/UA Conversion
32				//-----------------------------------------------------------
33				console.log('AutoConverting...');
34				await PDFNet.addResourceSearchPath(extraction_module_path);
35				if (!await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure)) {
36					console.log('');
37					console.log('Unable to run PDFUATest: Apryse SDK Data Extraction module not available.');
38					console.log('---------------------------------------------------------------');
39					console.log('The Data Extraction module is an optional add-on, available for download');
40					console.log('at https://apryse.com/. If you have already downloaded this');
41					console.log('module, ensure that the SDK is able to find the required files');
42					console.log('using the PDFNet.addResourceSearchPath() function.');
43					console.log('');
44					return;
45				}
46				const pdf_ua = await PDFNet.PDFUAConformance.create();
47				console.log('Simple Conversion...');
48				{
49					// Perform conversion using default options
50					await pdf_ua.autoConvert(input_file1, output_file1);
51				}
52				console.log('Converting With Options...');
53				{
54					const pdf_ua_opts = await PDFNet.PDFUAConformance.createPDFUAOptions();
55					pdf_ua_opts.setSaveLinearized(true); // Linearize when saving output
56					// Note: if file is password protected, you can use pdf_ua_opts.setPassword()
57					// Perform conversion using the options we specify
58					await pdf_ua.autoConvert(input_file2, output_file2, pdf_ua_opts);
59				}
60			} catch (err) {
61				console.log(err);
62			}
63			console.log('PDFUAConformance test completed.');
64		}
65		PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
66			console.log('Error: ' + JSON.stringify(error));
67		}).then(function () { return PDFNet.shutdown(); });
68	};
69	exports.runPDFUATest();
70})(exports);
71// eslint-disable-next-line spaced-comment
72//# sourceURL=PDFUATest.js

1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9//---------------------------------------------------------------------------------------
10// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
11// Note: this feature is currently experimental and subject to change
12//
13// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
14// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
15// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
16//---------------------------------------------------------------------------------------
17function main()
18{
19	// Relative path to the folder containing the test files.
20	$input_path = "../../TestFiles/";
21	$output_path = "../../TestFiles/Output/";
22	// DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
23	// If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder.
24	$extraction_module_path = "../../../PDFNetC/Lib/";
25	$input_file1 = $input_path."autotag_input.pdf";
26	$input_file2 = $input_path."table.pdf";
27	$output_file1 = $output_path."autotag_pdfua.pdf";
28	$output_file2 = $output_path."table_pdfua_linearized.pdf";
29	global $LicenseKey;
30	PDFNet::Initialize($LicenseKey);
31	PDFNet::GetSystemFontList();
32	echo(nl2br("AutoConverting...\n"));
33	PDFNet::AddResourceSearchPath($extraction_module_path);
34	if (!DataExtractionModule::IsModuleAvailable(DataExtractionModule::e_DocStructure)) {
35		echo(nl2br("\n"));
36		echo(nl2br("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.\n"));
37		echo(nl2br("-----------------------------------------------------------------------------\n"));
38		echo(nl2br("The Data Extraction suite is an optional add-on, available for download\n"));
39		echo(nl2br("at https://docs.apryse.com/documentation/core/info/modules/. If you have already\n"));
40		echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
41		echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
42		echo(nl2br("\n"));
43		PDFNet::Terminate();
44		return;
45	}
46	try {
47		$pdf_ua = new PDFUAConformance();
48		echo(nl2br("Simple Conversion...\n"));
49		// Perform conversion using default options
50		$pdf_ua->AutoConvert($input_file1, $output_file1);
51		echo(nl2br("Converting With Options...\n"));
52		$pdf_ua_opts = new PDFUAOptions();
53		$pdf_ua_opts->SetSaveLinearized(true); // Linearize when saving output
54		// Note: if file is password protected, you can use $pdf_ua_opts->SetPassword()
55		// Perform conversion using the options we specify
56		$pdf_ua->AutoConvert($input_file2, $output_file2, $pdf_ua_opts);
57	}
58	catch(Exception $e) {
59		echo(nl2br($e->getMessage()));
60	}
61	PDFNet::Terminate();
62	echo(nl2br("PDFUAConformance test completed.\n"));
63}
64main();
65?>

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5import site
6site.addsitedir("../../../PDFNetC/Lib")
7import sys
8from PDFNetPython import *
9sys.path.append("../../LicenseKey/PYTHON")
10from LicenseKey import *
11#---------------------------------------------------------------------------------------
12# The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
13# Note: this feature is currently experimental and subject to change
14#
15# DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
16# https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
17# https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
18#---------------------------------------------------------------------------------------
19# Relative path to the folder containing the test files.
20input_path = "../../TestFiles/"
21output_path = "../../TestFiles/Output/"
22# DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
23# If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder.
24extraction_module_path = "../../../PDFNetC/Lib/"
25def main():
26    input_file1 = input_path + "autotag_input.pdf"
27    input_file2 = input_path + "table.pdf"
28    output_file1 = output_path + "autotag_pdfua.pdf"
29    output_file2 = output_path + "table_pdfua_linearized.pdf"
30    PDFNet.Initialize(LicenseKey)
31    print("AutoConverting...")
32    PDFNet.AddResourceSearchPath(extraction_module_path)
33    if not DataExtractionModule.IsModuleAvailable(DataExtractionModule.e_DocStructure):
34        print("")
35        print("Unable to run Data Extraction: PDFTron SDK Structured Output module not available.")
36        print("-----------------------------------------------------------------------------")
37        print("The Data Extraction suite is an optional add-on, available for download")
38        print("at https://docs.apryse.com/documentation/core/info/modules/. If you have already")
39        print("downloaded this module, ensure that the SDK is able to find the required files")
40        print("using the PDFNet.AddResourceSearchPath() function.")
41        print("")
42        PDFNet.Terminate()
43        return
44    try:
45        pdf_ua = PDFUAConformance()
46        print("Simple Conversion...")
47        # Perform conversion using default options
48        pdf_ua.AutoConvert(input_file1, output_file1)
49        print("Converting With Options...")
50        pdf_ua_opts = PDFUAOptions()
51        pdf_ua_opts.SetSaveLinearized(True)  # Linearize when saving output
52        # Note: if file is password protected, you can use pdf_ua_opts.SetPassword()
53        # Perform conversion using the options we specify
54        pdf_ua.AutoConvert(input_file2, output_file2, pdf_ua_opts)
55    except Exception as e:
56        print(str(e))
57    PDFNet.Terminate()
58    print("PDFUAConformance test completed.")
59if __name__ == '__main__':
60    main()

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5require '../../../PDFNetC/Lib/PDFNetRuby'
6include PDFNetRuby
7require '../../LicenseKey/RUBY/LicenseKey'
8$stdout.sync = true
9#---------------------------------------------------------------------------------------
10# The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
11# Note: this feature is currently experimental and subject to change
12#
13# DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
14# https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
15# https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
16#---------------------------------------------------------------------------------------
17# Relative path to the folder containing the test files.
18$input_path = "../../TestFiles/"
19$output_path = "../../TestFiles/Output/"
20# DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
21# If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder.
22$extraction_module_path = "../../../PDFNetC/Lib/"
23def main()
24	input_file1 = $input_path + "autotag_input.pdf"
25	input_file2 = $input_path + "table.pdf"
26	output_file1 = $output_path + "autotag_pdfua.pdf"
27	output_file2 = $output_path + "table_pdfua_linearized.pdf"
28	PDFNet.Initialize(PDFTronLicense.Key)
29	puts "AutoConverting..."
30	PDFNet.AddResourceSearchPath($extraction_module_path)
31	if !DataExtractionModule.IsModuleAvailable(DataExtractionModule::E_DocStructure) then
32		puts ""
33		puts "Unable to run Data Extraction: PDFTron SDK Structured Output module not available."
34		puts "-----------------------------------------------------------------------------"
35		puts "The Data Extraction suite is an optional add-on, available for download"
36		puts "at https://docs.apryse.com/documentation/core/info/modules/. If you have already"
37		puts "downloaded this module, ensure that the SDK is able to find the required files"
38		puts "using the PDFNet.AddResourceSearchPath() function."
39		puts ""
40		PDFNet.Terminate
41		return
42	end
43	begin
44		pdf_ua = PDFUAConformance.new()
45		puts "Simple Conversion..."
46		# Perform conversion using default options
47		pdf_ua.AutoConvert(input_file1, output_file1)
48		puts "Converting With Options..."
49		pdf_ua_opts = PDFUAOptions.new()
50		pdf_ua_opts.SetSaveLinearized(true) # Linearize when saving output
51		# Note: if file is password protected, you can use pdf_ua_opts.SetPassword()
52		# Perform conversion using the options we specify
53		pdf_ua.AutoConvert(input_file2, output_file2, pdf_ua_opts)
54	rescue => error
55		puts error.message
56	end
57	PDFNet.Terminate
58	puts "PDFUAConformance test completed."
59end
60main()

1'---------------------------------------------------------------------------------------
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3' Consult legal.txt regarding legal and license information.     
4'---------------------------------------------------------------------------------------
5Imports System
6
7Imports pdftron
8Imports pdftron.PDF
9Imports PDFTRON.PDF.PDFUA
10
11'//---------------------------------------------------------------------------------------
12'// The following sample illustrates how to make sure a file meets the PDF/UA standard, using the PDFUAConformance class object.
13'// Note: this feature is currently experimental and subject to change
14'//
15'// DataExtractionModule is required (Mac users can use StructuredOutputModule instead)
16'// https://docs.apryse.com/documentation/core/info/modules/#data-extraction-module
17'// https://docs.apryse.com/documentation/core/info/modules/#structured-output-module (Mac)
18'//---------------------------------------------------------------------------------------
19
20Module mainModule
21    Dim pdfNetLoader As PDFNetLoader
22    Sub New()
23        pdfNetLoader = pdftron.PDFNetLoader.Instance()
24    End Sub
25
26    '// Relative path to the folder containing test files.
27    Dim input_path As String =  "../../../../TestFiles/"
28    Dim output_path As String = "../../../../TestFiles/Output/"
29
30    '// DataExtraction library location, replace if desired, should point to a folder that includes the contents of <DataExtractionModuleRoot>/Lib.
31    '// If using default, unzip the DataExtraction zip to the parent folder of Samples, and merge with existing "Lib" folder
32    Dim extraction_module_path As String = "../../../../../Lib/"
33
34    ' The main entry point for the application.
35    Sub Main()
36
37        PDFNet.Initialize(PDFTronLicense.Key)
38
39        Try
40            Dim input_file1 As String = input_path + "autotag_input.pdf"
41            Dim input_file2 As String = input_path + "table.pdf"
42            Dim output_file1 As String = output_path + "autotag_pdfua.pdf"
43            Dim output_file2 As String = output_path + "table_pdfua_linearized.pdf"
44
45            '//-----------------------------------------------------------
46            '// Example: PDF/UA Conversion
47            '//-----------------------------------------------------------
48            Console.WriteLine("AutoConverting...")
49
50            PDFNet.AddResourceSearchPath(extraction_module_path)
51            If Not DataExtractionModule.IsModuleAvailable(DataExtractionModule.DataExtractionEngine.e_doc_structure) Then
52                Console.WriteLine("Unable to run PDFUATest: Apryse SDK Data Extraction module not available.")
53                Console.WriteLine("---------------------------------------------------------------")
54                Console.WriteLine("The Data Extraction module is an optional add-on, available for download")
55                Console.WriteLine("at https://apryse.com/. If you have already downloaded this")
56                Console.WriteLine("module, ensure that the SDK is able to find the required files")
57                Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.")
58                Console.WriteLine()
59                Return
60            End If
61
62            Dim pdf_ua As PDFUAConformance = New PDFUAConformance()
63
64            Console.WriteLine("Simple Conversion...")
65
66            '// Perform conversion using default options
67            pdf_ua.AutoConvert(input_file1, output_file1)
68
69
70            Console.WriteLine("Converting With Options...")
71
72            Dim pdf_ua_opts As PDFUAOptions = New PDFUAOptions()
73            pdf_ua_opts.SetSaveLinearized(True) '// Linearize When saving output
74            '// Note: if file is password protected, you can use pdf_ua_opts.setPassword()
75
76            '// Perform conversion using the options we specify
77            pdf_ua.AutoConvert(input_file2, output_file2, pdf_ua_opts)
78        Catch e As PDFTRON.Common.PDFNetException
79            Console.WriteLine(e.Message)
80        End Try
81
82        PDFNet.Terminate()
83        Console.WriteLine("PDFUAConformance test completed.")
84    End Sub
85
86End Module

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

Convert to PDF/UA - Python Sample Code