PDF2Html - Convert PDF to HTML - Python Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

To run this sample, you will need to:

Get started with Server SDK in your language/framework
Download the Structured Output Module

Learn more about our Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <iostream>
7#include <sstream>
8#include <PDF/PDFNet.h>
9#include <PDF/Convert.h>
10#include <PDF/StructuredOutputModule.h>
11#include "../../LicenseKey/CPP/LicenseKey.h"
12
13//---------------------------------------------------------------------------------------
14// The following sample illustrates how to use the PDF::Convert utility class to convert 
15// documents and files to HTML.
16//
17// There are two HTML modules and one of them is an optional PDFNet Add-on.
18// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
19//    documents.
20// 2. The optional add-on module is used to convert PDF documents to HTML documents with
21//    text flowing across the browser window.
22//
23// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
24//
25// Please contact us if you have any questions.	
26//---------------------------------------------------------------------------------------
27
28using namespace pdftron;
29using namespace PDF;
30using namespace std;
31
32UString inputPath("../../TestFiles/");
33UString outputPath("../../TestFiles/Output/");
34
35int main(int argc, char *argv[])
36{	
37	// The first step in every application using PDFNet is to initialize the 
38	// library. The library is usually initialized only once, but calling 
39	// Initialize() multiple times is also fine.
40	PDFNet::Initialize(LicenseKey);
41
42	int err = 0;
43
44	//////////////////////////////////////////////////////////////////////////
45
46	try
47	{
48		// Convert PDF document to HTML with fixed positioning option turned on (default)
49		cout << "Converting PDF to HTML with fixed positioning option turned on (default)" << endl;
50
51		UString outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
52
53		// Convert PDF to HTML
54		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
55
56		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
57	}
58	catch (Common::Exception& e)
59	{
60		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
61		err = 1;
62	}
63	catch (...)
64	{
65		cout << "Unknown Exception" << endl;
66		err = 1;
67	}
68
69	//////////////////////////////////////////////////////////////////////////
70
71	PDFNet::AddResourceSearchPath("../../../Lib/");
72
73	if (!StructuredOutputModule::IsModuleAvailable())
74	{
75		cout << endl;
76		cout << "Unable to run part of the sample: Apryse SDK Structured Output module not available." << endl;
77		cout << "-------------------------------------------------------------------------------------" << endl;
78		cout << "The Structured Output module is an optional add-on, available for download" << endl;
79		cout << "at https://docs.apryse.com/core/guides/info/modules . If you have already" << endl;
80		cout << "downloaded this module, ensure that the SDK is able to find the required files" << endl;
81		cout << "using the PDFNet::AddResourceSearchPath() function." << endl;
82		cout << endl;
83		return 0;
84	}
85
86	//////////////////////////////////////////////////////////////////////////
87
88	try
89	{
90		// Convert PDF document to HTML with reflow full option turned on (1)
91		cout << "Converting PDF to HTML with reflow full option turned on (1)" << endl;
92
93		UString outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95		Convert::HTMLOutputOptions htmlOutputOptions;
96
97		// Set e_reflow_full content reflow setting
98		htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
99
100		// Convert PDF to HTML
101		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
102
103		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
104	}
105	catch (Common::Exception& e)
106	{
107		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
108		err = 1;
109	}
110	catch (...)
111	{
112		cout << "Unknown Exception" << endl;
113		err = 1;
114	}
115
116	//////////////////////////////////////////////////////////////////////////
117
118	try
119	{
120		// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
121		cout << "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)" << endl;
122
123		UString outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
124
125		Convert::HTMLOutputOptions htmlOutputOptions;
126
127		// Set e_reflow_full content reflow setting
128		htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
129
130		// Convert only the first page
131		htmlOutputOptions.SetPages(1, 1);
132
133		// Convert PDF to HTML
134		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
135
136		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
137	}
138	catch (Common::Exception& e)
139	{
140		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
141		err = 1;
142	}
143	catch (...)
144	{
145		cout << "Unknown Exception" << endl;
146		err = 1;
147	}
148
149	//////////////////////////////////////////////////////////////////////////
150
151	PDFNet::Terminate();
152	cout << "Done.\n";
153	return err;
154}

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.PDF;
9
10namespace PDF2HtmlTestCS
11{
12	/// <summary>
13	// The following sample illustrates how to use the PDF::Convert utility class to convert 
14	// documents and files to HTML.
15	//
16	// There are two HTML modules and one of them is an optional PDFNet Add-on.
17	// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
18	//    documents.
19	// 2. The optional add-on module is used to convert PDF documents to HTML documents with
20	//    text flowing across the browser window.
21	//
22	// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
23	//
24	// Please contact us if you have any questions.	
25	/// </summary>
26
27	class Class1
28	{
29		private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
30
31		static Class1() { }
32
33		// Relative path to the folder containing test files.
34		const string inputPath = "../../../../TestFiles/";
35		const string outputPath = "../../../../TestFiles/Output/";
36
37		/// <summary>
38		/// The main entry point for the application.
39		/// </summary>
40		[STAThread]
41		static int Main(string[] args)
42		{
43			// The first step in every application using PDFNet is to initialize the 
44			// library. The library is usually initialized only once, but calling 
45			// Initialize() multiple times is also fine.
46			PDFNet.Initialize(PDFTronLicense.Key);
47
48			bool err = false;
49
50			//////////////////////////////////////////////////////////////////////////
51
52			try
53			{
54				// Convert PDF document to HTML with fixed positioning option turned on (default)
55				Console.WriteLine("Converting PDF to HTML with fixed positioning option turned on (default)");
56
57				string outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
58
59				pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
60
61				Console.WriteLine("Result saved in " + outputFile);
62			}
63			catch (PDFNetException e)
64			{
65				Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
66				err = true;
67			}
68			catch (Exception e)
69			{
70				Console.WriteLine("Unknown Exception, error: ");
71				Console.WriteLine(e);
72				err = true;
73			}
74
75			//////////////////////////////////////////////////////////////////////////
76
77			PDFNet.AddResourceSearchPath("../../../../../Lib/");
78
79			if (!StructuredOutputModule.IsModuleAvailable())
80			{
81				Console.WriteLine();
82				Console.WriteLine("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
83				Console.WriteLine("-------------------------------------------------------------------------------------");
84				Console.WriteLine("The Structured Output module is an optional add-on, available for download");
85				Console.WriteLine("at https://docs.apryse.com/core/guides/info/modules . If you have already");
86				Console.WriteLine("downloaded this module, ensure that the SDK is able to find the required files");
87				Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.");
88				Console.WriteLine();
89				return 0;
90			}
91
92			//////////////////////////////////////////////////////////////////////////
93
94			try
95			{
96				// Convert PDF document to HTML with reflow full option turned on (1)
97				Console.WriteLine("Converting PDF to HTML with reflow full option turned on (1)");
98
99				string outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
100
101				pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
102
103				// Set e_reflow_full content reflow setting
104				htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
105
106				pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
107
108				Console.WriteLine("Result saved in " + outputFile);
109			}
110			catch (PDFNetException e)
111			{
112				Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
113				err = true;
114			}
115			catch (Exception e)
116			{
117				Console.WriteLine("Unknown Exception, error: ");
118				Console.WriteLine(e);
119				err = true;
120			}
121
122			//////////////////////////////////////////////////////////////////////////
123
124			try
125			{
126				// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
127				Console.WriteLine("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
128
129				string outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
130
131				pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
132
133				// Set e_reflow_full content reflow setting
134				htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
135
136				// Convert only the first page
137				htmlOutputOptions.SetPages(1, 1);
138
139				pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
140
141				Console.WriteLine("Result saved in " + outputFile);
142			}
143			catch (PDFNetException e)
144			{
145				Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
146				err = true;
147			}
148			catch (Exception e)
149			{
150				Console.WriteLine("Unknown Exception, error: ");
151				Console.WriteLine(e);
152				err = true;
153			}
154
155			//////////////////////////////////////////////////////////////////////////
156
157			PDFNet.Terminate();
158			Console.WriteLine("Done.");
159			return (err == false ? 0 : 1);
160		}
161	}
162}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8	"fmt"
9	. "pdftron"
10)
11
12import  "pdftron/Samples/LicenseKey/GO"
13
14//---------------------------------------------------------------------------------------
15// The following sample illustrates how to use the PDF::Convert utility class to convert 
16// documents and files to HTML.
17//
18// There are two HTML modules and one of them is an optional PDFNet Add-on.
19// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
20//    documents.
21// 2. The optional add-on module is used to convert PDF documents to HTML documents with
22//    text flowing across the browser window.
23//
24// The PDFTron SDK HTML add-on module can be downloaded from http://www.pdftron.com/
25//
26// Please contact us if you have any questions.
27//---------------------------------------------------------------------------------------
28
29// Relative path to the folder containing the test files.
30var inputPath = "../../TestFiles/"
31var outputPath = "../../TestFiles/Output/"
32
33//---------------------------------------------------------------------------------------
34
35func catch(err *error) {
36    if r := recover(); r != nil {
37        *err = fmt.Errorf("%v", r)
38    }
39}
40
41//---------------------------------------------------------------------------------------
42
43func ConvertToHtmlFixedPositionTest() (err error) {
44	defer catch(&err)
45
46	// Convert PDF document to HTML with fixed positioning option turned on (default)
47	fmt.Println("Converting PDF to HTML with fixed positioning option turned on (default)")
48
49	inputFile := inputPath + "paragraphs_and_tables.pdf"
50	outputFile := outputPath + "paragraphs_and_tables_fixed_positioning"
51
52	// Convert to HTML
53	ConvertToHtml(inputFile, outputFile)
54
55	fmt.Println("Result saved in " + outputFile)
56	return nil
57}
58
59//---------------------------------------------------------------------------------------
60
61func ConvertToHtmlReflowParagraphTest1() (err error) {
62	defer catch(&err)
63
64	// Convert PDF document to HTML with reflow full option turned on (1)
65	fmt.Println("Converting PDF to HTML with reflow full option turned on (1)")
66
67	inputFile := inputPath + "paragraphs_and_tables.pdf"
68	outputFile := outputPath + "paragraphs_and_tables_reflow_full.html"
69
70	htmlOutputOptions := NewHTMLOutputOptions()
71
72	// Set e_reflow_full content reflow setting
73	htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
74
75	// Convert to HTML
76	ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
77
78	fmt.Println("Result saved in " + outputFile)
79	return nil
80}
81
82//---------------------------------------------------------------------------------------
83
84func ConvertToHtmlReflowParagraphTest2() (err error) {
85	defer catch(&err)
86
87	// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
88	fmt.Println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
89
90	inputFile := inputPath + "paragraphs_and_tables.pdf"
91	outputFile := outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
92
93	htmlOutputOptions := NewHTMLOutputOptions()
94
95	// Set e_reflow_full content reflow setting
96	htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
97
98	// Convert only the first page
99	htmlOutputOptions.SetPages(1, 1);
100
101	// Convert to HTML
102	ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
103
104	fmt.Println("Result saved in " + outputFile)
105	return nil
106}
107
108//---------------------------------------------------------------------------------------
109
110func main() {
111    // The first step in every application using PDFNet is to initialize the 
112    // library. The library is usually initialized only once, but calling 
113    // Initialize() multiple times is also fine.
114    PDFNetInitialize(PDFTronLicense.Key)
115
116	//-----------------------------------------------------------------------------------
117
118	// Convert PDF document to HTML with fixed positioning option turned on (default)
119	err := ConvertToHtmlFixedPositionTest()
120	if err != nil {
121		fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
122	}
123
124	//-----------------------------------------------------------------------------------
125
126	PDFNetAddResourceSearchPath("../../../PDFNetC/Lib/")
127
128	if !StructuredOutputModuleIsModuleAvailable() {
129		fmt.Println("")
130		fmt.Println("Unable to run part of the sample: PDFTron SDK Structured Output module not available.")
131		fmt.Println("-------------------------------------------------------------------------------------")
132		fmt.Println("The Structured Output module is an optional add-on, available for download")
133		fmt.Println("at https://docs.apryse.com/core/guides/info/modules . If you have already")
134		fmt.Println("downloaded this module, ensure that the SDK is able to find the required file")
135		fmt.Println("using the PDFNet::AddResourceSearchPath() function.")
136		fmt.Println("")
137		return
138	}
139
140	//-----------------------------------------------------------------------------------
141
142	// Convert PDF document to HTML with reflow full option turned on (1)
143	err = ConvertToHtmlReflowParagraphTest1()
144	if err != nil {
145		fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
146	}
147
148	//-----------------------------------------------------------------------------------
149
150	// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
151	err = ConvertToHtmlReflowParagraphTest2()
152	if err != nil {
153		fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
154	}
155
156	//-----------------------------------------------------------------------------------
157
158    PDFNetTerminate()
159    fmt.Println("Done.")
160}

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8
9//---------------------------------------------------------------------------------------
10// The following sample illustrates how to use the PDF::Convert utility class to convert 
11// documents and files to HTML.
12//
13// There are two HTML modules and one of them is an optional PDFNet Add-on.
14// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
15//    documents.
16// 2. The optional add-on module is used to convert PDF documents to HTML documents with
17//    text flowing across the browser window.
18//
19// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
20//
21// Please contact us if you have any questions.
22//---------------------------------------------------------------------------------------
23
24public class PDF2HtmlTest 
25{
26    // Relative path to the folder containing test files.
27    static String inputPath = "../../TestFiles/";
28    static String outputPath = "../../TestFiles/Output/";
29
30    /// <summary>
31    /// The main entry point for the application.
32    /// </summary>
33    public static void main(String[] args) 
34    {
35        // The first step in every application using PDFNet is to initialize the 
36        // library. The library is usually initialized only once, but calling 
37        // Initialize() multiple times is also fine.
38        PDFNet.initialize(PDFTronLicense.Key());
39
40        boolean err = false;
41
42        //////////////////////////////////////////////////////////////////////////
43        
44        try {
45            // Convert PDF document to HTML with fixed positioning option turned on (default)
46            System.out.println("Converting PDF to HTML with fixed positioning option turned on (default)");
47
48            String outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
49
50            Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
51
52            System.out.println("Result saved in " + outputFile);
53        } catch (PDFNetException e) {
54            System.out.println("Unable to convert PDF document to HTML, error: ");
55            System.out.println(e);
56            err = true;
57        }  catch (Exception e) {
58            System.out.println("Unknown Exception, error: ");
59            System.out.println(e);
60            err = true;
61        }
62
63        //////////////////////////////////////////////////////////////////////////
64        
65        PDFNet.addResourceSearchPath("../../../Lib/");
66
67        try {
68            if (!StructuredOutputModule.isModuleAvailable()) {
69                System.out.println();
70                System.out.println("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
71                System.out.println("-------------------------------------------------------------------------------------");
72                System.out.println("The Structured Output module is an optional add-on, available for download");
73                System.out.println("at https://docs.apryse.com/core/guides/info/modules . If you have already");
74                System.out.println("downloaded this module, ensure that the SDK is able to find the required files");
75                System.out.println("using the PDFNet::AddResourceSearchPath() function.");
76                System.out.println();
77                return;
78            }
79        } catch (PDFNetException e) {
80            System.out.println(e);
81            return;
82        }  catch (Exception e) {
83            System.out.println(e);
84            return;
85        }
86
87        //////////////////////////////////////////////////////////////////////////
88
89        try {
90            // Convert PDF document to HTML with reflow full option turned on (1)
91            System.out.println("Converting PDF to HTML with reflow full option turned on (1)");
92
93            String outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95            Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
96
97            // Set e_reflow_full content reflow setting
98            htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
99
100            Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
101
102            System.out.println("Result saved in " + outputFile);
103        } catch (PDFNetException e) {
104            System.out.println("Unable to convert PDF document to HTML, error: ");
105            System.out.println(e);
106            err = true;
107        }  catch (Exception e) {
108            System.out.println("Unknown Exception, error: ");
109            System.out.println(e);
110            err = true;
111        }
112
113        //////////////////////////////////////////////////////////////////////////
114        
115        try {
116            // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
117            System.out.println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
118
119            String outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
120
121            Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
122
123            // Set e_reflow_full content reflow setting
124            htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
125
126            // Convert only the first page
127            htmlOutputOptions.setPages(1, 1);
128
129            Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
130
131            System.out.println("Result saved in " + outputFile);
132        } catch (PDFNetException e) {
133            System.out.println("Unable to convert PDF document to HTML, error: ");
134            System.out.println(e);
135            err = true;
136        }  catch (Exception e) {
137            System.out.println("Unknown Exception, error: ");
138            System.out.println(e);
139            err = true;
140        }
141
142        //////////////////////////////////////////////////////////////////////////
143
144        PDFNet.terminate();
145        System.out.println("Done.");        
146    }
147}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6//---------------------------------------------------------------------------------------
7// The following sample illustrates how to use the PDF::Convert utility class to convert 
8// documents and files to HTML.
9//
10// There are two HTML modules and one of them is an optional PDFNet Add-on.
11// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
12//    documents.
13// 2. The optional Structured Output add-on module is used to convert PDF documents to
14//    HTML documents with text flowing across the browser window.
15//
16// The Apryse SDK Structured Output module can be downloaded from
17// https://docs.apryse.com/core/info/modules/
18//
19// Please contact us if you have any questions.	
20//---------------------------------------------------------------------------------------
21
22const { PDFNet } = require('@pdftron/pdfnet-node');
23const PDFTronLicense = require('../LicenseKey/LicenseKey');
24
25((exports) => {
26	'use strict';
27
28	exports.runPDF2HtmlTest = () => {
29
30		const main = async () => {
31
32			const inputPath = '../TestFiles/';
33			const outputPath = '../TestFiles/Output/';
34
35			//////////////////////////////////////////////////////////////////////////
36
37			try {
38				// Convert PDF document to HTML with fixed positioning option turned on (default)
39				console.log('Converting PDF to HTML with fixed positioning option turned on (default)');
40
41				const outputFile = outputPath + 'paragraphs_and_tables_fixed_positioning';
42
43				// Convert PDF to HTML
44				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile);
45
46				console.log('Result saved in ' + outputFile);
47			} catch (err) {
48				console.log(err);
49			}
50
51			//////////////////////////////////////////////////////////////////////////
52
53			await PDFNet.addResourceSearchPath('../../lib/');
54
55			if (!await PDFNet.StructuredOutputModule.isModuleAvailable()) {
56				console.log('\nUnable to run part of the sample: Apryse SDK Structured Output module not available.');
57				console.log('---------------------------------------------------------------');
58				console.log('The Structured Output module is an optional add-on, available for download');
59				console.log('at https://docs.apryse.com/core/guides/info/modules . If you have already');
60				console.log('downloaded this module, ensure that the SDK is able to find the required files');
61				console.log('using the PDFNet::AddResourceSearchPath() function.\n');
62
63				return;
64			}
65
66			//////////////////////////////////////////////////////////////////////////
67
68			try {
69				// Convert PDF document to HTML with reflow full option turned on (1)
70				console.log('Converting PDF to HTML with reflow full option turned on (1)');
71
72				const outputFile = outputPath + 'paragraphs_and_tables_reflow_full.html';
73
74				const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();
75
76				// Set e_reflow_full content reflow setting
77				htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
78
79				// Convert PDF to HTML
80				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);
81
82				console.log('Result saved in ' + outputFile);
83			} catch (err) {
84				console.log(err);
85			}
86
87			//////////////////////////////////////////////////////////////////////////
88
89			try {
90				// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
91				console.log('Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)');
92
93				const outputFile = outputPath + 'paragraphs_and_tables_reflow_full_first_page.html';
94
95				const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();
96
97				// Set e_reflow_full content reflow setting
98				htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
99
100				// Convert only the first page
101				htmlOutputOptions.setPages(1, 1);
102
103				// Convert PDF to HTML
104				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);
105
106				console.log('Result saved in ' + outputFile);
107			} catch (err) {
108				console.log(err);
109			}
110
111			//////////////////////////////////////////////////////////////////////////
112
113			console.log('Done.');
114		};
115
116		PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
117			console.log('Error: ' + JSON.stringify(error));
118		}).then(function () { return PDFNet.shutdown(); });
119	};
120	exports.runPDF2HtmlTest();
121})(exports);
122// eslint-disable-next-line spaced-comment
123//# sourceURL=PDF2HtmlTest.js

1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10//---------------------------------------------------------------------------------------
11// The following sample illustrates how to use the PDF::Convert utility class to convert 
12// documents and files to HTML.
13//
14// There are two HTML modules and one of them is an optional PDFNet Add-on.
15// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
16//    documents.
17// 2. The optional add-on module is used to convert PDF documents to HTML documents with
18//    text flowing across the browser window.
19//
20// The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
21//
22// Please contact us if you have any questions.
23//---------------------------------------------------------------------------------------
24
25function main()
26{
27	// Relative path to the folder containing the test files.
28	$inputPath = getcwd()."/../../TestFiles/";
29	$outputPath = $inputPath."Output/";
30
31	// The first step in every application using PDFNet is to initialize the 
32	// library. The library is usually initialized only once, but calling 
33	// Initialize() multiple times is also fine.
34	global $LicenseKey;
35	PDFNet::Initialize($LicenseKey);
36	PDFNet::GetSystemFontList();    // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
37	
38	//-----------------------------------------------------------------------------------
39
40	try {
41		// Convert PDF document to HTML with fixed positioning option turned on (default)
42		echo(nl2br("Converting PDF to HTML with fixed positioning option turned on (default)\n"));
43
44		$outputFile = $outputPath."paragraphs_and_tables_fixed_positioning";
45
46		Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile);
47
48		echo(nl2br("Result saved in " . $outputFile . "\n"));
49	}
50	catch(Exception $e) {
51		echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
52	}
53
54	//-----------------------------------------------------------------------------------
55
56	PDFNet::AddResourceSearchPath("../../../PDFNetC/Lib/");
57
58	if (!StructuredOutputModule::IsModuleAvailable()) {
59		echo(nl2br("\n"));
60		echo(nl2br("Unable to run part of the sample: PDFTron SDK Structured Output module not available.\n"));
61		echo(nl2br("-------------------------------------------------------------------------------------\n"));
62		echo(nl2br("The Structured Output module is an optional add-on, available for download\n"));
63		echo(nl2br("at https://docs.apryse.com/core/guides/info/modules . If you have already\n"));
64		echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
65		echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
66		echo(nl2br("\n"));
67		return;
68	}
69
70	//-----------------------------------------------------------------------------------
71
72	try {
73		// Convert PDF document to HTML with reflow full option turned on (1)
74		echo(nl2br("Converting PDF to HTML with reflow full option turned on (1)\n"));
75
76		$outputFile = $outputPath."paragraphs_and_tables_reflow_full.html";
77
78		$htmlOutputOptions = new HTMLOutputOptions();
79
80		// Set e_reflow_full content reflow setting
81		$htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
82
83		Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
84
85		echo(nl2br("Result saved in " . $outputFile . "\n"));
86	}
87	catch(Exception $e) {
88		echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
89	}
90
91	//-----------------------------------------------------------------------------------
92
93	try {
94		// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
95		echo(nl2br("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)\n"));
96
97		$outputFile = $outputPath."paragraphs_and_tables_reflow_full_first_page.html";
98
99		$htmlOutputOptions = new HTMLOutputOptions();
100
101		// Set e_reflow_full content reflow setting
102		$htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
103
104		// Convert only the first page
105		$htmlOutputOptions->SetPages(1, 1);
106
107		Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
108
109		echo(nl2br("Result saved in " . $outputFile . "\n"));
110	}
111	catch(Exception $e) {
112		echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
113	}
114
115	//-----------------------------------------------------------------------------------
116	PDFNet::Terminate();
117	echo(nl2br("Done.\n"));
118}
119
120main();
121?>

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11import platform
12
13sys.path.append("../../LicenseKey/PYTHON")
14from LicenseKey import *
15
16#---------------------------------------------------------------------------------------
17# The following sample illustrates how to use the PDF.Convert utility class to convert 
18# documents and files to HTML.
19#
20# There are two HTML modules and one of them is an optional PDFNet Add-on.
21# 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
22#    documents.
23# 2. The optional add-on module is used to convert PDF documents to HTML documents with
24#    text flowing across the browser window.
25#
26# The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
27#
28# Please contact us if you have any questions.
29#---------------------------------------------------------------------------------------
30
31# Relative path to the folder containing the test files.
32inputPath = "../../TestFiles/"
33outputPath = "../../TestFiles/Output/"
34
35def main():
36    # The first step in every application using PDFNet is to initialize the 
37    # library. The library is usually initialized only once, but calling 
38    # Initialize() multiple times is also fine.
39    PDFNet.Initialize(LicenseKey)
40    
41    #-----------------------------------------------------------------------------------
42
43    try:
44        # Convert PDF document to HTML with fixed positioning option turned on (default)
45        print("Converting PDF to HTML with fixed positioning option turned on (default)")
46
47        outputFile = outputPath + "paragraphs_and_tables_fixed_positioning"
48
49        Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile)
50
51        print("Result saved in " + outputFile)
52    except Exception as e:
53        print("Unable to convert PDF document to HTML, error: " + str(e))
54
55    #-----------------------------------------------------------------------------------
56
57    PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/")
58
59    if not StructuredOutputModule.IsModuleAvailable():
60        print("")
61        print("Unable to run part of the sample: PDFTron SDK Structured Output module not available.")
62        print("-------------------------------------------------------------------------------------")
63        print("The Structured Output module is an optional add-on, available for download")
64        print("at https://docs.apryse.com/core/guides/info/modules . If you have already")
65        print("downloaded this module, ensure that the SDK is able to find the required files")
66        print("using the PDFNet::AddResourceSearchPath() function.")
67        print("")
68        return
69
70    #-----------------------------------------------------------------------------------
71
72    try:
73        # Convert PDF document to HTML with reflow full option turned on (1)
74        print("Converting PDF to HTML with reflow full option turned on (1)")
75
76        outputFile = outputPath + "paragraphs_and_tables_reflow_full.html"
77
78        htmlOutputOptions = HTMLOutputOptions()
79
80        # Set e_reflow_full content reflow setting
81        htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptions.e_reflow_full)
82
83        Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
84
85        print("Result saved in " + outputFile)
86    except Exception as e:
87        print("Unable to convert PDF document to HTML, error: " + str(e))
88
89    #-----------------------------------------------------------------------------------
90
91    try:
92        # Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
93        print("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
94
95        outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
96
97        htmlOutputOptions = HTMLOutputOptions()
98
99        # Set e_reflow_full content reflow setting
100        htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptions.e_reflow_full)
101
102        # Convert only the first page
103        htmlOutputOptions.SetPages(1, 1)
104
105        Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
106
107        print("Result saved in " + outputFile)
108    except Exception as e:
109        print("Unable to convert PDF document to HTML, error: " + str(e))
110
111    #-----------------------------------------------------------------------------------
112
113    PDFNet.Terminate()
114    print("Done.")
115    
116if __name__ == '__main__':
117    main()

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12#---------------------------------------------------------------------------------------
13# The following sample illustrates how to use the PDF.Convert utility class to convert 
14# documents and files to HTML.
15#
16# There are two HTML modules and one of them is an optional PDFNet Add-on.
17# 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
18#    documents.
19# 2. The optional add-on module is used to convert PDF documents to HTML documents with
20#    text flowing across the browser window.
21#
22# The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
23#
24# Please contact us if you have any questions.
25#---------------------------------------------------------------------------------------
26
27# Relative path to the folder containing the test files.
28$inputPath = "../../TestFiles/"
29$outputPath = "../../TestFiles/Output/"
30	
31def main()
32	# The first step in every application using PDFNet is to initialize the 
33	# library. The library is usually initialized only once, but calling 
34	# Initialize() multiple times is also fine.
35	PDFNet.Initialize(PDFTronLicense.Key)
36
37	#-----------------------------------------------------------------------------------
38
39	begin
40		# Convert PDF document to HTML with fixed positioning option turned on (default)
41		puts "Converting PDF to HTML with fixed positioning option turned on (default)"
42
43		$outputFile = $outputPath + "paragraphs_and_tables_fixed_positioning"
44
45		Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile)
46		puts "Result saved in " + $outputFile
47	rescue => error
48		puts "Unable to convert PDF document to HTML, error: " + error.message
49	end
50
51	#-----------------------------------------------------------------------------------
52
53	PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/");
54
55	if !StructuredOutputModule.IsModuleAvailable() then
56		puts ""
57		puts "Unable to run part of the sample: PDFTron SDK Structured Output module not available."
58		puts "-------------------------------------------------------------------------------------"
59		puts "The Structured Output module is an optional add-on, available for download"
60		puts "at https://docs.apryse.com/core/guides/info/modules . If you have already"
61		puts "downloaded this module, ensure that the SDK is able to find the required files"
62		puts "using the PDFNet::AddResourceSearchPath() function."
63		puts ""
64		return
65	end
66
67	#-----------------------------------------------------------------------------------
68
69	begin
70		# Convert PDF document to HTML with reflow full option turned on (1)
71		puts "Converting PDF to HTML with reflow full option turned on (1)"
72
73		$outputFile = $outputPath + "paragraphs_and_tables_reflow_full.html"
74
75		$htmlOutputOptions = Convert::HTMLOutputOptions.new()
76
77		# Set e_reflow_full content reflow setting
78		$htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::E_reflow_full)
79
80		Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions)
81		puts "Result saved in " + $outputFile
82	rescue => error
83		puts "Unable to convert PDF document to HTML, error: " + error.message
84	end
85
86	#-----------------------------------------------------------------------------------
87
88	begin
89		# Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
90		puts "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)"
91
92		$outputFile = $outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
93
94		$htmlOutputOptions = Convert::HTMLOutputOptions.new()
95
96		# Set e_reflow_full content reflow setting
97		$htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::E_reflow_full)
98
99		# Convert only the first page
100		$htmlOutputOptions.SetPages(1, 1)
101
102		Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions)
103		puts "Result saved in " + $outputFile
104	rescue => error
105		puts "Unable to convert PDF document to HTML, error: " + error.message
106	end
107
108	#-----------------------------------------------------------------------------------
109	PDFNet.Terminate
110	puts "Done."
111end
112
113main()

1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports pdftron
7Imports pdftron.Common
8Imports pdftron.PDF
9
10' The following sample illustrates how to use the PDF:Convert utility Class To convert 
11' documents And files to HTML.
12'
13' There are two HTML modules And one of them Is an optional PDFNet Add-on.
14' 1. The built-in HTML module Is used to convert PDF documents to fixed-position HTML
15'    documents.
16' 2. The optional add-on module Is used to convert PDF documents to HTML documents with
17'    text flowing across the browser window.
18'
19' The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
20'
21' Please contact us if you have any questions.	
22'
23' Also note that conversion under ASP.NET can be tricky to configure. Please see the following document for advice: 
24' http://www.pdftron.com/pdfnet/faq_files/Converting_Documents_in_Windows_Service_or_ASP.NET_Application_using_PDFNet.pdf
25
26Module PDF2HtmlTestVB
27    Class Class1
28        Shared pdfNetLoader As pdftron.PDFNetLoader = pdftron.PDFNetLoader.Instance()
29
30        Shared Sub New()
31        End Sub
32
33        ' Relative path to the folder containing test files.
34        Const inputPath As String = "../../../../TestFiles/"
35        Const outputPath As String = "../../../../TestFiles/Output/"
36
37        <STAThread>
38        Shared Sub Main(ByVal args As String())
39            ' The first step in every application using PDFNet Is to initialize the 
40            ' library. The library Is usually initialized only once, but calling 
41            ' Initialize() multiple times Is also fine.
42            PDFNet.Initialize(PDFTronLicense.Key)
43
44            Dim err As Boolean = False
45
46            '//////////////////////////////////////////////////////////////////////////
47
48            Try
49                ' Convert PDF document to HTML with fixed positioning option turned on (default)
50                Console.WriteLine("Converting PDF to HTML with fixed positioning option turned on (default)")
51
52                Dim outputFile As String = outputPath & "paragraphs_and_tables_fixed_positioning.html"
53
54                pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile)
55
56                Console.WriteLine("Result saved in " & outputFile)
57            Catch e As PDFNetException
58                Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
59                err = True
60            Catch e As Exception
61                Console.WriteLine("Unknown Exception, error: ")
62                Console.WriteLine(e)
63                err = True
64            End Try
65
66            '//////////////////////////////////////////////////////////////////////////
67
68            PDFNet.AddResourceSearchPath("../../../../../Lib/")
69
70            If Not StructuredOutputModule.IsModuleAvailable() Then
71                Console.WriteLine()
72                Console.WriteLine("Unable to run part of the sample: Apryse SDK Structured Output module not available.")
73                Console.WriteLine("-------------------------------------------------------------------------------------")
74                Console.WriteLine("The Structured Output module is an optional add-on, available for download")
75                Console.WriteLine("at http://docs.apryse.com/core/guides/info/modules . If you have already downloaded this")
76                Console.WriteLine("module, ensure that the SDK is able to find the required files")
77                Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.")
78                Console.WriteLine()
79                Return
80            End If
81
82            '//////////////////////////////////////////////////////////////////////////
83
84            Try
85                ' Convert PDF document to HTML with reflow full option turned on (1)
86                Console.WriteLine("Converting PDF to HTML with reflow full option turned on (1)")
87
88                Dim outputFile As String = outputPath & "paragraphs_and_tables_reflow_full.html"
89
90                Dim htmlOutputOptions As pdftron.PDF.Convert.HTMLOutputOptions = New pdftron.PDF.Convert.HTMLOutputOptions()
91
92                ' Set e_reflow_full content reflow setting
93                htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full)
94
95                pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
96
97                Console.WriteLine("Result saved in " & outputFile)
98            Catch e As PDFNetException
99                Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
100                err = True
101            Catch e As Exception
102                Console.WriteLine("Unknown Exception, error: ")
103                Console.WriteLine(e)
104                err = True
105            End Try
106
107            '//////////////////////////////////////////////////////////////////////////
108
109            Try
110                ' Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
111                Console.WriteLine("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
112
113                Dim outputFile As String = outputPath & "paragraphs_and_tables_reflow_full_first_page.html"
114
115                Dim htmlOutputOptions As pdftron.PDF.Convert.HTMLOutputOptions = New pdftron.PDF.Convert.HTMLOutputOptions()
116
117                ' Set e_reflow_full content reflow setting
118                htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full)
119
120                ' Convert only the first page
121                htmlOutputOptions.SetPages(1, 1)
122
123                pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
124
125                Console.WriteLine("Result saved in " & outputFile)
126            Catch e As PDFNetException
127                Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
128                err = True
129            Catch e As Exception
130                Console.WriteLine("Unknown Exception, error: ")
131                Console.WriteLine(e)
132                err = True
133            End Try
134
135            '//////////////////////////////////////////////////////////////////////////
136
137            PDFNet.Terminate()
138            Console.WriteLine("Done.")
139        End Sub
140    End Class
141End Module

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

PDF2Html - Convert PDF to HTML - Python Sample Code