PDF2Html - Convert PDF to HTML - Node.js (JavaScript) Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

To run this sample, you will need to:

  1. Get started with Server SDK in your language/framework
  2. Download the Structured Output Module

Learn more about our Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <iostream>
7#include <sstream>
8#include <PDF/PDFNet.h>
9#include <PDF/Convert.h>
10#include <PDF/StructuredOutputModule.h>
11#include "../../LicenseKey/CPP/LicenseKey.h"
12
13//---------------------------------------------------------------------------------------
14// The following sample illustrates how to use the PDF::Convert utility class to convert
15// documents and files to HTML.
16//
17// There are two HTML modules and one of them is an optional PDFNet Add-on.
18// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
19// documents.
20// 2. The optional add-on module is used to convert PDF documents to HTML documents with
21// text flowing across the browser window.
22//
23// The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
24//
25// Please contact us if you have any questions.
26//---------------------------------------------------------------------------------------
27
28using namespace pdftron;
29using namespace PDF;
30using namespace std;
31
32UString inputPath("../../TestFiles/");
33UString outputPath("../../TestFiles/Output/");
34
35int main(int argc, char *argv[])
36{
37 // The first step in every application using PDFNet is to initialize the
38 // library. The library is usually initialized only once, but calling
39 // Initialize() multiple times is also fine.
40 PDFNet::Initialize(LicenseKey);
41
42 int err = 0;
43
44 //////////////////////////////////////////////////////////////////////////
45
46 try
47 {
48 // Convert PDF document to HTML with fixed positioning option turned on (default)
49 cout << "Converting PDF to HTML with fixed positioning option turned on (default)" << endl;
50
51 UString outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
52
53 // Convert PDF to HTML
54 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
55
56 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
57 }
58 catch (Common::Exception& e)
59 {
60 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
61 err = 1;
62 }
63 catch (...)
64 {
65 cout << "Unknown Exception" << endl;
66 err = 1;
67 }
68
69 //////////////////////////////////////////////////////////////////////////
70
71 PDFNet::AddResourceSearchPath("../../../Lib/");
72
73 if (!StructuredOutputModule::IsModuleAvailable())
74 {
75 cout << endl;
76 cout << "Unable to run part of the sample: Apryse SDK Structured Output module not available." << endl;
77 cout << "-------------------------------------------------------------------------------------" << endl;
78 cout << "The Structured Output module is an optional add-on, available for download" << endl;
79 cout << "at https://docs.apryse.com/core/guides/info/modules . If you have already" << endl;
80 cout << "downloaded this module, ensure that the SDK is able to find the required files" << endl;
81 cout << "using the PDFNet::AddResourceSearchPath() function." << endl;
82 cout << endl;
83 return 0;
84 }
85
86 //////////////////////////////////////////////////////////////////////////
87
88 try
89 {
90 // Convert PDF document to HTML with reflow full option turned on (1)
91 cout << "Converting PDF to HTML with reflow full option turned on (1)" << endl;
92
93 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
94
95 Convert::HTMLOutputOptions htmlOutputOptions;
96
97 // Set e_reflow_full content reflow setting
98 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
99
100 // Convert PDF to HTML
101 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
102
103 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
104 }
105 catch (Common::Exception& e)
106 {
107 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
108 err = 1;
109 }
110 catch (...)
111 {
112 cout << "Unknown Exception" << endl;
113 err = 1;
114 }
115
116 //////////////////////////////////////////////////////////////////////////
117
118 try
119 {
120 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
121 cout << "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)" << endl;
122
123 UString outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
124
125 Convert::HTMLOutputOptions htmlOutputOptions;
126
127 // Set e_reflow_full content reflow setting
128 htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);
129
130 // Convert only the first page
131 htmlOutputOptions.SetPages(1, 1);
132
133 // Convert PDF to HTML
134 Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
135
136 cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
137 }
138 catch (Common::Exception& e)
139 {
140 cout << "Unable to convert PDF document to HTML, error: " << e << endl;
141 err = 1;
142 }
143 catch (...)
144 {
145 cout << "Unknown Exception" << endl;
146 err = 1;
147 }
148
149 //////////////////////////////////////////////////////////////////////////
150
151 PDFNet::Terminate();
152 cout << "Done.\n";
153 return err;
154}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales
Convert PDF to HTML with Apryse SDK in Node.js (JavaScript) | Apryse documentation