PDF2HtmlTest

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, JavaScript, PHP, Ruby, Go and VB. Learn more about our PDF to HTML

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <iostream>
7#include <sstream>
8#include <PDF/PDFNet.h>
9#include <PDF/Convert.h>
10#include <PDF/StructuredOutputModule.h>
11#include "../../LicenseKey/CPP/LicenseKey.h"
12
13//---------------------------------------------------------------------------------------
14// The following sample illustrates how to use the PDF::Convert utility class to convert
15// documents and files to HTML.
16//
17// There are two HTML modules and one of them is an optional PDFNet Add-on.
18// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
19// documents.
20// 2. The optional add-on module is used to convert PDF documents to HTML documents with
21// text flowing across the browser window.
22//
23// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
24//
25// Please contact us if you have any questions.
26//---------------------------------------------------------------------------------------
27
28using namespace pdftron;
29using namespace PDF;
30using namespace std;
31
32UString inputPath("../../TestFiles/");
33UString outputPath("../../TestFiles/Output/");
34
35int main(int argc, char *argv[])
36{
37 // The first step in every application using PDFNet is to initialize the
38 // library. The library is usually initialized only once, but calling
39 // Initialize() multiple times is also fine.
40 PDFNet::Initialize(LicenseKey);
41
42 int err = 0;
43
44 //////////////////////////////////////////////////////////////////////////
45
46 try
47 {
48 // Convert PDF document to HTML with fixed positioning option turned on (default)
49 cout << "Converting PDF to HTML with fixed positioning option turned on (default)" << endl;
50
51 UString outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
52
53 // Convert PDF to HTML
54 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
55
56 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
57 }
58 catch (Common::Exception& e)
59 {
60 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
61 err = 1;
62 }
63 catch (...)
64 {
65 cout << "Unknown Exception" << endl;
66 err = 1;
67 }
68
69 //////////////////////////////////////////////////////////////////////////
70
71 PDFNet::AddResourceSearchPath("../../../Lib/");
72
73 if (!StructuredOutputModule::IsModuleAvailable())
74 {
75 cout << endl;
76 cout << "Unable to run part of the sample: Apryse SDK Structured Output module not available." << endl;
77 cout << "-------------------------------------------------------------------------------------" << endl;
78 cout << "The Structured Output module is an optional add-on, available for download" << endl;
79 cout << "at https://docs.apryse.com/core/info/modules/. If you have already" << endl;
80 cout << "downloaded this module, ensure that the SDK is able to find the required files" << endl;
81 cout << "using the PDFNet::AddResourceSearchPath() function." << endl;
82 cout << endl;
83 return 0;
84 }
85
86 //////////////////////////////////////////////////////////////////////////
87
88 try
89 {
90 // Convert PDF document to HTML with reflow full option turned on (1)
91 cout << "Converting PDF to HTML with reflow full option turned on (1)" << endl;
92
93 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95 Convert::HTMLOutputOptions htmlOutputOptions;
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
99
100 // Convert PDF to HTML
101 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
102
103 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
104 }
105 catch (Common::Exception& e)
106 {
107 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
108 err = 1;
109 }
110 catch (...)
111 {
112 cout << "Unknown Exception" << endl;
113 err = 1;
114 }
115
116 //////////////////////////////////////////////////////////////////////////
117
118 try
119 {
120 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
121 cout << "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)" << endl;
122
123 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
124
125 Convert::HTMLOutputOptions htmlOutputOptions;
126
127 // Set e_reflow_full content reflow setting
128 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
129
130 // Convert only the first page
131 htmlOutputOptions.SetPages(1, 1);
132
133 // Convert PDF to HTML
134 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
135
136 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
137 }
138 catch (Common::Exception& e)
139 {
140 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
141 err = 1;
142 }
143 catch (...)
144 {
145 cout << "Unknown Exception" << endl;
146 err = 1;
147 }
148
149 //////////////////////////////////////////////////////////////////////////
150
151 PDFNet::Terminate();
152 cout << "Done.\n";
153 return err;
154}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales