Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, JavaScript, PHP, Ruby, Go and VB. Learn more about our PDF to HTML
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <iostream>
7#include <sstream>
8#include <PDF/PDFNet.h>
9#include <PDF/Convert.h>
10#include <PDF/StructuredOutputModule.h>
11#include "../../LicenseKey/CPP/LicenseKey.h"
12
13//---------------------------------------------------------------------------------------
14// The following sample illustrates how to use the PDF::Convert utility class to convert
15// documents and files to HTML.
16//
17// There are two HTML modules and one of them is an optional PDFNet Add-on.
18// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
19// documents.
20// 2. The optional add-on module is used to convert PDF documents to HTML documents with
21// text flowing across the browser window.
22//
23// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
24//
25// Please contact us if you have any questions.
26//---------------------------------------------------------------------------------------
27
28using namespace pdftron;
29using namespace PDF;
30using namespace std;
31
32UString inputPath("../../TestFiles/");
33UString outputPath("../../TestFiles/Output/");
34
35int main(int argc, char *argv[])
36{
37 // The first step in every application using PDFNet is to initialize the
38 // library. The library is usually initialized only once, but calling
39 // Initialize() multiple times is also fine.
40 PDFNet::Initialize(LicenseKey);
41
42 int err = 0;
43
44 //////////////////////////////////////////////////////////////////////////
45
46 try
47 {
48 // Convert PDF document to HTML with fixed positioning option turned on (default)
49 cout << "Converting PDF to HTML with fixed positioning option turned on (default)" << endl;
50
51 UString outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
52
53 // Convert PDF to HTML
54 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
55
56 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
57 }
58 catch (Common::Exception& e)
59 {
60 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
61 err = 1;
62 }
63 catch (...)
64 {
65 cout << "Unknown Exception" << endl;
66 err = 1;
67 }
68
69 //////////////////////////////////////////////////////////////////////////
70
71 PDFNet::AddResourceSearchPath("../../../Lib/");
72
73 if (!StructuredOutputModule::IsModuleAvailable())
74 {
75 cout << endl;
76 cout << "Unable to run part of the sample: Apryse SDK Structured Output module not available." << endl;
77 cout << "-------------------------------------------------------------------------------------" << endl;
78 cout << "The Structured Output module is an optional add-on, available for download" << endl;
79 cout << "at https://docs.apryse.com/core/info/modules/. If you have already" << endl;
80 cout << "downloaded this module, ensure that the SDK is able to find the required files" << endl;
81 cout << "using the PDFNet::AddResourceSearchPath() function." << endl;
82 cout << endl;
83 return 0;
84 }
85
86 //////////////////////////////////////////////////////////////////////////
87
88 try
89 {
90 // Convert PDF document to HTML with reflow full option turned on (1)
91 cout << "Converting PDF to HTML with reflow full option turned on (1)" << endl;
92
93 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95 Convert::HTMLOutputOptions htmlOutputOptions;
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
99
100 // Convert PDF to HTML
101 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
102
103 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
104 }
105 catch (Common::Exception& e)
106 {
107 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
108 err = 1;
109 }
110 catch (...)
111 {
112 cout << "Unknown Exception" << endl;
113 err = 1;
114 }
115
116 //////////////////////////////////////////////////////////////////////////
117
118 try
119 {
120 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
121 cout << "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)" << endl;
122
123 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
124
125 Convert::HTMLOutputOptions htmlOutputOptions;
126
127 // Set e_reflow_full content reflow setting
128 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
129
130 // Convert only the first page
131 htmlOutputOptions.SetPages(1, 1);
132
133 // Convert PDF to HTML
134 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
135
136 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
137 }
138 catch (Common::Exception& e)
139 {
140 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
141 err = 1;
142 }
143 catch (...)
144 {
145 cout << "Unknown Exception" << endl;
146 err = 1;
147 }
148
149 //////////////////////////////////////////////////////////////////////////
150
151 PDFNet::Terminate();
152 cout << "Done.\n";
153 return err;
154}
1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.PDF;
9
10namespace PDF2HtmlTestCS
11{
12 /// <summary>
13 // The following sample illustrates how to use the PDF::Convert utility class to convert
14 // documents and files to HTML.
15 //
16 // There are two HTML modules and one of them is an optional PDFNet Add-on.
17 // 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
18 // documents.
19 // 2. The optional add-on module is used to convert PDF documents to HTML documents with
20 // text flowing across the browser window.
21 //
22 // The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
23 //
24 // Please contact us if you have any questions.
25 /// </summary>
26
27 class Class1
28 {
29 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
30
31 static Class1() { }
32
33 // Relative path to the folder containing test files.
34 const string inputPath = "../../../../TestFiles/";
35 const string outputPath = "../../../../TestFiles/Output/";
36
37 /// <summary>
38 /// The main entry point for the application.
39 /// </summary>
40 [STAThread]
41 static int Main(string[] args)
42 {
43 // The first step in every application using PDFNet is to initialize the
44 // library. The library is usually initialized only once, but calling
45 // Initialize() multiple times is also fine.
46 PDFNet.Initialize(PDFTronLicense.Key);
47
48 bool err = false;
49
50 //////////////////////////////////////////////////////////////////////////
51
52 try
53 {
54 // Convert PDF document to HTML with fixed positioning option turned on (default)
55 Console.WriteLine("Converting PDF to HTML with fixed positioning option turned on (default)");
56
57 string outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
58
59 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
60
61 Console.WriteLine("Result saved in " + outputFile);
62 }
63 catch (PDFNetException e)
64 {
65 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
66 err = true;
67 }
68 catch (Exception e)
69 {
70 Console.WriteLine("Unknown Exception, error: ");
71 Console.WriteLine(e);
72 err = true;
73 }
74
75 //////////////////////////////////////////////////////////////////////////
76
77 PDFNet.AddResourceSearchPath("../../../../../Lib/");
78
79 if (!StructuredOutputModule.IsModuleAvailable())
80 {
81 Console.WriteLine();
82 Console.WriteLine("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
83 Console.WriteLine("-------------------------------------------------------------------------------------");
84 Console.WriteLine("The Structured Output module is an optional add-on, available for download");
85 Console.WriteLine("at https://docs.apryse.com/core/info/modules/. If you have already");
86 Console.WriteLine("downloaded this module, ensure that the SDK is able to find the required files");
87 Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.");
88 Console.WriteLine();
89 return 0;
90 }
91
92 //////////////////////////////////////////////////////////////////////////
93
94 try
95 {
96 // Convert PDF document to HTML with reflow full option turned on (1)
97 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (1)");
98
99 string outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
100
101 pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
102
103 // Set e_reflow_full content reflow setting
104 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
105
106 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
107
108 Console.WriteLine("Result saved in " + outputFile);
109 }
110 catch (PDFNetException e)
111 {
112 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
113 err = true;
114 }
115 catch (Exception e)
116 {
117 Console.WriteLine("Unknown Exception, error: ");
118 Console.WriteLine(e);
119 err = true;
120 }
121
122 //////////////////////////////////////////////////////////////////////////
123
124 try
125 {
126 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
127 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
128
129 string outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
130
131 pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
132
133 // Set e_reflow_full content reflow setting
134 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
135
136 // Convert only the first page
137 htmlOutputOptions.SetPages(1, 1);
138
139 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
140
141 Console.WriteLine("Result saved in " + outputFile);
142 }
143 catch (PDFNetException e)
144 {
145 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
146 err = true;
147 }
148 catch (Exception e)
149 {
150 Console.WriteLine("Unknown Exception, error: ");
151 Console.WriteLine(e);
152 err = true;
153 }
154
155 //////////////////////////////////////////////////////////////////////////
156
157 PDFNet.Terminate();
158 Console.WriteLine("Done.");
159 return (err == false ? 0 : 1);
160 }
161 }
162}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 . "pdftron"
10)
11
12import "pdftron/Samples/LicenseKey/GO"
13
14//---------------------------------------------------------------------------------------
15// The following sample illustrates how to use the PDF::Convert utility class to convert
16// documents and files to HTML.
17//
18// There are two HTML modules and one of them is an optional PDFNet Add-on.
19// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
20// documents.
21// 2. The optional add-on module is used to convert PDF documents to HTML documents with
22// text flowing across the browser window.
23//
24// The PDFTron SDK HTML add-on module can be downloaded from http://www.pdftron.com/
25//
26// Please contact us if you have any questions.
27//---------------------------------------------------------------------------------------
28
29// Relative path to the folder containing the test files.
30var inputPath = "../../TestFiles/"
31var outputPath = "../../TestFiles/Output/"
32
33//---------------------------------------------------------------------------------------
34
35func catch(err *error) {
36 if r := recover(); r != nil {
37 *err = fmt.Errorf("%v", r)
38 }
39}
40
41//---------------------------------------------------------------------------------------
42
43func ConvertToHtmlFixedPositionTest() (err error) {
44 defer catch(&err)
45
46 // Convert PDF document to HTML with fixed positioning option turned on (default)
47 fmt.Println("Converting PDF to HTML with fixed positioning option turned on (default)")
48
49 inputFile := inputPath + "paragraphs_and_tables.pdf"
50 outputFile := outputPath + "paragraphs_and_tables_fixed_positioning"
51
52 // Convert to HTML
53 ConvertToHtml(inputFile, outputFile)
54
55 fmt.Println("Result saved in " + outputFile)
56 return nil
57}
58
59//---------------------------------------------------------------------------------------
60
61func ConvertToHtmlReflowParagraphTest1() (err error) {
62 defer catch(&err)
63
64 // Convert PDF document to HTML with reflow full option turned on (1)
65 fmt.Println("Converting PDF to HTML with reflow full option turned on (1)")
66
67 inputFile := inputPath + "paragraphs_and_tables.pdf"
68 outputFile := outputPath + "paragraphs_and_tables_reflow_full.html"
69
70 htmlOutputOptions := NewHTMLOutputOptions()
71
72 // Set e_reflow_full content reflow setting
73 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
74
75 // Convert to HTML
76 ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
77
78 fmt.Println("Result saved in " + outputFile)
79 return nil
80}
81
82//---------------------------------------------------------------------------------------
83
84func ConvertToHtmlReflowParagraphTest2() (err error) {
85 defer catch(&err)
86
87 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
88 fmt.Println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
89
90 inputFile := inputPath + "paragraphs_and_tables.pdf"
91 outputFile := outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
92
93 htmlOutputOptions := NewHTMLOutputOptions()
94
95 // Set e_reflow_full content reflow setting
96 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
97
98 // Convert only the first page
99 htmlOutputOptions.SetPages(1, 1);
100
101 // Convert to HTML
102 ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
103
104 fmt.Println("Result saved in " + outputFile)
105 return nil
106}
107
108//---------------------------------------------------------------------------------------
109
110func main() {
111 // The first step in every application using PDFNet is to initialize the
112 // library. The library is usually initialized only once, but calling
113 // Initialize() multiple times is also fine.
114 PDFNetInitialize(PDFTronLicense.Key)
115
116 //-----------------------------------------------------------------------------------
117
118 // Convert PDF document to HTML with fixed positioning option turned on (default)
119 err := ConvertToHtmlFixedPositionTest()
120 if err != nil {
121 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
122 }
123
124 //-----------------------------------------------------------------------------------
125
126 PDFNetAddResourceSearchPath("../../../PDFNetC/Lib/")
127
128 if !StructuredOutputModuleIsModuleAvailable() {
129 fmt.Println("")
130 fmt.Println("Unable to run part of the sample: PDFTron SDK Structured Output module not available.")
131 fmt.Println("-------------------------------------------------------------------------------------")
132 fmt.Println("The Structured Output module is an optional add-on, available for download")
133 fmt.Println("at https://docs.apryse.com/core/info/modules/. If you have already")
134 fmt.Println("downloaded this module, ensure that the SDK is able to find the required file")
135 fmt.Println("using the PDFNet::AddResourceSearchPath() function.")
136 fmt.Println("")
137 return
138 }
139
140 //-----------------------------------------------------------------------------------
141
142 // Convert PDF document to HTML with reflow full option turned on (1)
143 err = ConvertToHtmlReflowParagraphTest1()
144 if err != nil {
145 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
146 }
147
148 //-----------------------------------------------------------------------------------
149
150 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
151 err = ConvertToHtmlReflowParagraphTest2()
152 if err != nil {
153 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
154 }
155
156 //-----------------------------------------------------------------------------------
157
158 PDFNetTerminate()
159 fmt.Println("Done.")
160}
1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8
9//---------------------------------------------------------------------------------------
10// The following sample illustrates how to use the PDF::Convert utility class to convert
11// documents and files to HTML.
12//
13// There are two HTML modules and one of them is an optional PDFNet Add-on.
14// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
15// documents.
16// 2. The optional add-on module is used to convert PDF documents to HTML documents with
17// text flowing across the browser window.
18//
19// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
20//
21// Please contact us if you have any questions.
22//---------------------------------------------------------------------------------------
23
24public class PDF2HtmlTest
25{
26 // Relative path to the folder containing test files.
27 static String inputPath = "../../TestFiles/";
28 static String outputPath = "../../TestFiles/Output/";
29
30 /// <summary>
31 /// The main entry point for the application.
32 /// </summary>
33 public static void main(String[] args)
34 {
35 // The first step in every application using PDFNet is to initialize the
36 // library. The library is usually initialized only once, but calling
37 // Initialize() multiple times is also fine.
38 PDFNet.initialize(PDFTronLicense.Key());
39
40 boolean err = false;
41
42 //////////////////////////////////////////////////////////////////////////
43
44 try {
45 // Convert PDF document to HTML with fixed positioning option turned on (default)
46 System.out.println("Converting PDF to HTML with fixed positioning option turned on (default)");
47
48 String outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
49
50 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
51
52 System.out.println("Result saved in " + outputFile);
53 } catch (PDFNetException e) {
54 System.out.println("Unable to convert PDF document to HTML, error: ");
55 System.out.println(e);
56 err = true;
57 } catch (Exception e) {
58 System.out.println("Unknown Exception, error: ");
59 System.out.println(e);
60 err = true;
61 }
62
63 //////////////////////////////////////////////////////////////////////////
64
65 PDFNet.addResourceSearchPath("../../../Lib/");
66
67 try {
68 if (!StructuredOutputModule.isModuleAvailable()) {
69 System.out.println();
70 System.out.println("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
71 System.out.println("-------------------------------------------------------------------------------------");
72 System.out.println("The Structured Output module is an optional add-on, available for download");
73 System.out.println("at https://docs.apryse.com/core/info/modules/. If you have already");
74 System.out.println("downloaded this module, ensure that the SDK is able to find the required files");
75 System.out.println("using the PDFNet::AddResourceSearchPath() function.");
76 System.out.println();
77 return;
78 }
79 } catch (PDFNetException e) {
80 System.out.println(e);
81 return;
82 } catch (Exception e) {
83 System.out.println(e);
84 return;
85 }
86
87 //////////////////////////////////////////////////////////////////////////
88
89 try {
90 // Convert PDF document to HTML with reflow full option turned on (1)
91 System.out.println("Converting PDF to HTML with reflow full option turned on (1)");
92
93 String outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95 Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
99
100 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
101
102 System.out.println("Result saved in " + outputFile);
103 } catch (PDFNetException e) {
104 System.out.println("Unable to convert PDF document to HTML, error: ");
105 System.out.println(e);
106 err = true;
107 } catch (Exception e) {
108 System.out.println("Unknown Exception, error: ");
109 System.out.println(e);
110 err = true;
111 }
112
113 //////////////////////////////////////////////////////////////////////////
114
115 try {
116 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
117 System.out.println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
118
119 String outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
120
121 Convert.HTMLOutputOptions htmlOutputOptions = new Convert.HTMLOutputOptions();
122
123 // Set e_reflow_full content reflow setting
124 htmlOutputOptions.setContentReflowSetting(Convert.HTMLOutputOptions.e_reflow_full);
125
126 // Convert only the first page
127 htmlOutputOptions.setPages(1, 1);
128
129 Convert.toHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
130
131 System.out.println("Result saved in " + outputFile);
132 } catch (PDFNetException e) {
133 System.out.println("Unable to convert PDF document to HTML, error: ");
134 System.out.println(e);
135 err = true;
136 } catch (Exception e) {
137 System.out.println("Unknown Exception, error: ");
138 System.out.println(e);
139 err = true;
140 }
141
142 //////////////////////////////////////////////////////////////////////////
143
144 PDFNet.terminate();
145 System.out.println("Done.");
146 }
147}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6//---------------------------------------------------------------------------------------
7// The following sample illustrates how to use the PDF::Convert utility class to convert
8// documents and files to HTML.
9//
10// There are two HTML modules and one of them is an optional PDFNet Add-on.
11// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
12// documents.
13// 2. The optional Structured Output add-on module is used to convert PDF documents to
14// HTML documents with text flowing across the browser window.
15//
16// The Apryse SDK Structured Output module can be downloaded from
17// https://docs.apryse.com/core/info/modules/
18//
19// Please contact us if you have any questions.
20//---------------------------------------------------------------------------------------
21
22const { PDFNet } = require('@pdftron/pdfnet-node');
23const PDFTronLicense = require('../LicenseKey/LicenseKey');
24
25((exports) => {
26 'use strict';
27
28 exports.runPDF2HtmlTest = () => {
29
30 const main = async () => {
31
32 const inputPath = '../TestFiles/';
33 const outputPath = '../TestFiles/Output/';
34
35 //////////////////////////////////////////////////////////////////////////
36
37 try {
38 // Convert PDF document to HTML with fixed positioning option turned on (default)
39 console.log('Converting PDF to HTML with fixed positioning option turned on (default)');
40
41 const outputFile = outputPath + 'paragraphs_and_tables_fixed_positioning';
42
43 // Convert PDF to HTML
44 await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile);
45
46 console.log('Result saved in ' + outputFile);
47 } catch (err) {
48 console.log(err);
49 }
50
51 //////////////////////////////////////////////////////////////////////////
52
53 await PDFNet.addResourceSearchPath('../../lib/');
54
55 if (!await PDFNet.StructuredOutputModule.isModuleAvailable()) {
56 console.log('\nUnable to run part of the sample: Apryse SDK Structured Output module not available.');
57 console.log('---------------------------------------------------------------');
58 console.log('The Structured Output module is an optional add-on, available for download');
59 console.log('at https://docs.apryse.com/core/info/modules/. If you have already');
60 console.log('downloaded this module, ensure that the SDK is able to find the required files');
61 console.log('using the PDFNet::AddResourceSearchPath() function.\n');
62
63 return;
64 }
65
66 //////////////////////////////////////////////////////////////////////////
67
68 try {
69 // Convert PDF document to HTML with reflow full option turned on (1)
70 console.log('Converting PDF to HTML with reflow full option turned on (1)');
71
72 const outputFile = outputPath + 'paragraphs_and_tables_reflow_full.html';
73
74 const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();
75
76 // Set e_reflow_full content reflow setting
77 htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
78
79 // Convert PDF to HTML
80 await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);
81
82 console.log('Result saved in ' + outputFile);
83 } catch (err) {
84 console.log(err);
85 }
86
87 //////////////////////////////////////////////////////////////////////////
88
89 try {
90 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
91 console.log('Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)');
92
93 const outputFile = outputPath + 'paragraphs_and_tables_reflow_full_first_page.html';
94
95 const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
99
100 // Convert only the first page
101 htmlOutputOptions.setPages(1, 1);
102
103 // Convert PDF to HTML
104 await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);
105
106 console.log('Result saved in ' + outputFile);
107 } catch (err) {
108 console.log(err);
109 }
110
111 //////////////////////////////////////////////////////////////////////////
112
113 console.log('Done.');
114 };
115
116 PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
117 console.log('Error: ' + JSON.stringify(error));
118 }).then(function () { return PDFNet.shutdown(); });
119 };
120 exports.runPDF2HtmlTest();
121})(exports);
122// eslint-disable-next-line spaced-comment
123//# sourceURL=PDF2HtmlTest.js
1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10//---------------------------------------------------------------------------------------
11// The following sample illustrates how to use the PDF::Convert utility class to convert
12// documents and files to HTML.
13//
14// There are two HTML modules and one of them is an optional PDFNet Add-on.
15// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
16// documents.
17// 2. The optional add-on module is used to convert PDF documents to HTML documents with
18// text flowing across the browser window.
19//
20// The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
21//
22// Please contact us if you have any questions.
23//---------------------------------------------------------------------------------------
24
25function main()
26{
27 // Relative path to the folder containing the test files.
28 $inputPath = getcwd()."/../../TestFiles/";
29 $outputPath = $inputPath."Output/";
30
31 // The first step in every application using PDFNet is to initialize the
32 // library. The library is usually initialized only once, but calling
33 // Initialize() multiple times is also fine.
34 global $LicenseKey;
35 PDFNet::Initialize($LicenseKey);
36 PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
37
38 //-----------------------------------------------------------------------------------
39
40 try {
41 // Convert PDF document to HTML with fixed positioning option turned on (default)
42 echo(nl2br("Converting PDF to HTML with fixed positioning option turned on (default)\n"));
43
44 $outputFile = $outputPath."paragraphs_and_tables_fixed_positioning";
45
46 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile);
47
48 echo(nl2br("Result saved in " . $outputFile . "\n"));
49 }
50 catch(Exception $e) {
51 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
52 }
53
54 //-----------------------------------------------------------------------------------
55
56 PDFNet::AddResourceSearchPath("../../../PDFNetC/Lib/");
57
58 if (!StructuredOutputModule::IsModuleAvailable()) {
59 echo(nl2br("\n"));
60 echo(nl2br("Unable to run part of the sample: PDFTron SDK Structured Output module not available.\n"));
61 echo(nl2br("-------------------------------------------------------------------------------------\n"));
62 echo(nl2br("The Structured Output module is an optional add-on, available for download\n"));
63 echo(nl2br("at https://docs.apryse.com/core/info/modules/. If you have already\n"));
64 echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
65 echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
66 echo(nl2br("\n"));
67 return;
68 }
69
70 //-----------------------------------------------------------------------------------
71
72 try {
73 // Convert PDF document to HTML with reflow full option turned on (1)
74 echo(nl2br("Converting PDF to HTML with reflow full option turned on (1)\n"));
75
76 $outputFile = $outputPath."paragraphs_and_tables_reflow_full.html";
77
78 $htmlOutputOptions = new HTMLOutputOptions();
79
80 // Set e_reflow_full content reflow setting
81 $htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
82
83 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
84
85 echo(nl2br("Result saved in " . $outputFile . "\n"));
86 }
87 catch(Exception $e) {
88 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
89 }
90
91 //-----------------------------------------------------------------------------------
92
93 try {
94 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
95 echo(nl2br("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)\n"));
96
97 $outputFile = $outputPath."paragraphs_and_tables_reflow_full_first_page.html";
98
99 $htmlOutputOptions = new HTMLOutputOptions();
100
101 // Set e_reflow_full content reflow setting
102 $htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
103
104 // Convert only the first page
105 $htmlOutputOptions->SetPages(1, 1);
106
107 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
108
109 echo(nl2br("Result saved in " . $outputFile . "\n"));
110 }
111 catch(Exception $e) {
112 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
113 }
114
115 //-----------------------------------------------------------------------------------
116 PDFNet::Terminate();
117 echo(nl2br("Done.\n"));
118}
119
120main();
121?>
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11import platform
12
13sys.path.append("../../LicenseKey/PYTHON")
14from LicenseKey import *
15
16#---------------------------------------------------------------------------------------
17# The following sample illustrates how to use the PDF.Convert utility class to convert
18# documents and files to HTML.
19#
20# There are two HTML modules and one of them is an optional PDFNet Add-on.
21# 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
22# documents.
23# 2. The optional add-on module is used to convert PDF documents to HTML documents with
24# text flowing across the browser window.
25#
26# The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
27#
28# Please contact us if you have any questions.
29#---------------------------------------------------------------------------------------
30
31# Relative path to the folder containing the test files.
32inputPath = "../../TestFiles/"
33outputPath = "../../TestFiles/Output/"
34
35def main():
36 # The first step in every application using PDFNet is to initialize the
37 # library. The library is usually initialized only once, but calling
38 # Initialize() multiple times is also fine.
39 PDFNet.Initialize(LicenseKey)
40
41 #-----------------------------------------------------------------------------------
42
43 try:
44 # Convert PDF document to HTML with fixed positioning option turned on (default)
45 print("Converting PDF to HTML with fixed positioning option turned on (default)")
46
47 outputFile = outputPath + "paragraphs_and_tables_fixed_positioning"
48
49 Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile)
50
51 print("Result saved in " + outputFile)
52 except Exception as e:
53 print("Unable to convert PDF document to HTML, error: " + str(e))
54
55 #-----------------------------------------------------------------------------------
56
57 PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/")
58
59 if not StructuredOutputModule.IsModuleAvailable():
60 print("")
61 print("Unable to run part of the sample: PDFTron SDK Structured Output module not available.")
62 print("-------------------------------------------------------------------------------------")
63 print("The Structured Output module is an optional add-on, available for download")
64 print("at https://docs.apryse.com/core/info/modules/. If you have already")
65 print("downloaded this module, ensure that the SDK is able to find the required files")
66 print("using the PDFNet::AddResourceSearchPath() function.")
67 print("")
68 return
69
70 #-----------------------------------------------------------------------------------
71
72 try:
73 # Convert PDF document to HTML with reflow full option turned on (1)
74 print("Converting PDF to HTML with reflow full option turned on (1)")
75
76 outputFile = outputPath + "paragraphs_and_tables_reflow_full.html"
77
78 htmlOutputOptions = HTMLOutputOptions()
79
80 # Set e_reflow_full content reflow setting
81 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptions.e_reflow_full)
82
83 Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
84
85 print("Result saved in " + outputFile)
86 except Exception as e:
87 print("Unable to convert PDF document to HTML, error: " + str(e))
88
89 #-----------------------------------------------------------------------------------
90
91 try:
92 # Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
93 print("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
94
95 outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
96
97 htmlOutputOptions = HTMLOutputOptions()
98
99 # Set e_reflow_full content reflow setting
100 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptions.e_reflow_full)
101
102 # Convert only the first page
103 htmlOutputOptions.SetPages(1, 1)
104
105 Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
106
107 print("Result saved in " + outputFile)
108 except Exception as e:
109 print("Unable to convert PDF document to HTML, error: " + str(e))
110
111 #-----------------------------------------------------------------------------------
112
113 PDFNet.Terminate()
114 print("Done.")
115
116if __name__ == '__main__':
117 main()
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12#---------------------------------------------------------------------------------------
13# The following sample illustrates how to use the PDF.Convert utility class to convert
14# documents and files to HTML.
15#
16# There are two HTML modules and one of them is an optional PDFNet Add-on.
17# 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
18# documents.
19# 2. The optional add-on module is used to convert PDF documents to HTML documents with
20# text flowing across the browser window.
21#
22# The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
23#
24# Please contact us if you have any questions.
25#---------------------------------------------------------------------------------------
26
27# Relative path to the folder containing the test files.
28$inputPath = "../../TestFiles/"
29$outputPath = "../../TestFiles/Output/"
30
31def main()
32 # The first step in every application using PDFNet is to initialize the
33 # library. The library is usually initialized only once, but calling
34 # Initialize() multiple times is also fine.
35 PDFNet.Initialize(PDFTronLicense.Key)
36
37 #-----------------------------------------------------------------------------------
38
39 begin
40 # Convert PDF document to HTML with fixed positioning option turned on (default)
41 puts "Converting PDF to HTML with fixed positioning option turned on (default)"
42
43 $outputFile = $outputPath + "paragraphs_and_tables_fixed_positioning"
44
45 Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile)
46 puts "Result saved in " + $outputFile
47 rescue => error
48 puts "Unable to convert PDF document to HTML, error: " + error.message
49 end
50
51 #-----------------------------------------------------------------------------------
52
53 PDFNet.AddResourceSearchPath("../../../PDFNetC/Lib/");
54
55 if !StructuredOutputModule.IsModuleAvailable() then
56 puts ""
57 puts "Unable to run part of the sample: PDFTron SDK Structured Output module not available."
58 puts "-------------------------------------------------------------------------------------"
59 puts "The Structured Output module is an optional add-on, available for download"
60 puts "at https://docs.apryse.com/core/info/modules/. If you have already"
61 puts "downloaded this module, ensure that the SDK is able to find the required files"
62 puts "using the PDFNet::AddResourceSearchPath() function."
63 puts ""
64 return
65 end
66
67 #-----------------------------------------------------------------------------------
68
69 begin
70 # Convert PDF document to HTML with reflow full option turned on (1)
71 puts "Converting PDF to HTML with reflow full option turned on (1)"
72
73 $outputFile = $outputPath + "paragraphs_and_tables_reflow_full.html"
74
75 $htmlOutputOptions = Convert::HTMLOutputOptions.new()
76
77 # Set e_reflow_full content reflow setting
78 $htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::E_reflow_full)
79
80 Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions)
81 puts "Result saved in " + $outputFile
82 rescue => error
83 puts "Unable to convert PDF document to HTML, error: " + error.message
84 end
85
86 #-----------------------------------------------------------------------------------
87
88 begin
89 # Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
90 puts "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)"
91
92 $outputFile = $outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
93
94 $htmlOutputOptions = Convert::HTMLOutputOptions.new()
95
96 # Set e_reflow_full content reflow setting
97 $htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::E_reflow_full)
98
99 # Convert only the first page
100 $htmlOutputOptions.SetPages(1, 1)
101
102 Convert.ToHtml($inputPath + "paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions)
103 puts "Result saved in " + $outputFile
104 rescue => error
105 puts "Unable to convert PDF document to HTML, error: " + error.message
106 end
107
108 #-----------------------------------------------------------------------------------
109 PDFNet.Terminate
110 puts "Done."
111end
112
113main()
1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports pdftron
7Imports pdftron.Common
8Imports pdftron.PDF
9
10' The following sample illustrates how to use the PDF:Convert utility Class To convert
11' documents And files to HTML.
12'
13' There are two HTML modules And one of them Is an optional PDFNet Add-on.
14' 1. The built-in HTML module Is used to convert PDF documents to fixed-position HTML
15' documents.
16' 2. The optional add-on module Is used to convert PDF documents to HTML documents with
17' text flowing across the browser window.
18'
19' The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
20'
21' Please contact us if you have any questions.
22'
23' Also note that conversion under ASP.NET can be tricky to configure. Please see the following document for advice:
24' http://www.pdftron.com/pdfnet/faq_files/Converting_Documents_in_Windows_Service_or_ASP.NET_Application_using_PDFNet.pdf
25
26Module PDF2HtmlTestVB
27 Class Class1
28 Shared pdfNetLoader As pdftron.PDFNetLoader = pdftron.PDFNetLoader.Instance()
29
30 Shared Sub New()
31 End Sub
32
33 ' Relative path to the folder containing test files.
34 Const inputPath As String = "../../../../TestFiles/"
35 Const outputPath As String = "../../../../TestFiles/Output/"
36
37 <STAThread>
38 Shared Sub Main(ByVal args As String())
39 ' The first step in every application using PDFNet Is to initialize the
40 ' library. The library Is usually initialized only once, but calling
41 ' Initialize() multiple times Is also fine.
42 PDFNet.Initialize(PDFTronLicense.Key)
43
44 Dim err As Boolean = False
45
46 '//////////////////////////////////////////////////////////////////////////
47
48 Try
49 ' Convert PDF document to HTML with fixed positioning option turned on (default)
50 Console.WriteLine("Converting PDF to HTML with fixed positioning option turned on (default)")
51
52 Dim outputFile As String = outputPath & "paragraphs_and_tables_fixed_positioning.html"
53
54 pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile)
55
56 Console.WriteLine("Result saved in " & outputFile)
57 Catch e As PDFNetException
58 Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
59 err = True
60 Catch e As Exception
61 Console.WriteLine("Unknown Exception, error: ")
62 Console.WriteLine(e)
63 err = True
64 End Try
65
66 '//////////////////////////////////////////////////////////////////////////
67
68 PDFNet.AddResourceSearchPath("../../../../../Lib/")
69
70 If Not StructuredOutputModule.IsModuleAvailable() Then
71 Console.WriteLine()
72 Console.WriteLine("Unable to run part of the sample: Apryse SDK Structured Output module not available.")
73 Console.WriteLine("-------------------------------------------------------------------------------------")
74 Console.WriteLine("The Structured Output module is an optional add-on, available for download")
75 Console.WriteLine("at http://www.pdftron.com/. If you have already downloaded this")
76 Console.WriteLine("module, ensure that the SDK is able to find the required files")
77 Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.")
78 Console.WriteLine()
79 Return
80 End If
81
82 '//////////////////////////////////////////////////////////////////////////
83
84 Try
85 ' Convert PDF document to HTML with reflow full option turned on (1)
86 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (1)")
87
88 Dim outputFile As String = outputPath & "paragraphs_and_tables_reflow_full.html"
89
90 Dim htmlOutputOptions As pdftron.PDF.Convert.HTMLOutputOptions = New pdftron.PDF.Convert.HTMLOutputOptions()
91
92 ' Set e_reflow_full content reflow setting
93 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full)
94
95 pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
96
97 Console.WriteLine("Result saved in " & outputFile)
98 Catch e As PDFNetException
99 Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
100 err = True
101 Catch e As Exception
102 Console.WriteLine("Unknown Exception, error: ")
103 Console.WriteLine(e)
104 err = True
105 End Try
106
107 '//////////////////////////////////////////////////////////////////////////
108
109 Try
110 ' Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
111 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
112
113 Dim outputFile As String = outputPath & "paragraphs_and_tables_reflow_full_first_page.html"
114
115 Dim htmlOutputOptions As pdftron.PDF.Convert.HTMLOutputOptions = New pdftron.PDF.Convert.HTMLOutputOptions()
116
117 ' Set e_reflow_full content reflow setting
118 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full)
119
120 ' Convert only the first page
121 htmlOutputOptions.SetPages(1, 1)
122
123 pdftron.PDF.Convert.ToHtml(inputPath & "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions)
124
125 Console.WriteLine("Result saved in " & outputFile)
126 Catch e As PDFNetException
127 Console.WriteLine("Unable to convert PDF document to HTML, error: " & e.Message)
128 err = True
129 Catch e As Exception
130 Console.WriteLine("Unknown Exception, error: ")
131 Console.WriteLine(e)
132 err = True
133 End Try
134
135 '//////////////////////////////////////////////////////////////////////////
136
137 PDFNet.Terminate()
138 Console.WriteLine("Done.")
139 End Sub
140 End Class
141End Module
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales