Some test text!

Search
Hamburger Icon

Convert PDF to HTML in JavaScript

More languages

More languages
C++
C#
C# (.NET Core)
Go
Java
Obj-C
JS (Node.js)
PHP
Python
Ruby
VB

Sample JavaScript code for using PDFTron SDK to programmatically convert generic PDF documents to HTML. Learn more about our JavaScript PDF to HTML

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

//---------------------------------------------------------------------------------------
// The following sample illustrates how to use the PDF::Convert utility class to convert 
// documents and files to HTML.
//
// There are two HTML modules and one of them is an optional PDFNet Add-on.
// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
//    documents.
// 2. The optional Structured Output add-on module is used to convert PDF documents to
//    HTML documents with text flowing across the browser window.
//
// The Apryse SDK Structured Output module can be downloaded from
// https://docs.apryse.com/documentation/core/info/modules/
//
// Please contact us if you have any questions.	
//---------------------------------------------------------------------------------------

const { PDFNet } = require('@pdftron/pdfnet-node');
const PDFTronLicense = require('../LicenseKey/LicenseKey');

((exports) => {
	'use strict';

	exports.runPDF2HtmlTest = () => {

		const main = async () => {

			const inputPath = '../TestFiles/';
			const outputPath = '../TestFiles/Output/';

			//////////////////////////////////////////////////////////////////////////

			try {
				// Convert PDF document to HTML with fixed positioning option turned on (default)
				console.log('Converting PDF to HTML with fixed positioning option turned on (default)');

				const outputFile = outputPath + 'paragraphs_and_tables_fixed_positioning';

				// Convert PDF to HTML
				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile);

				console.log('Result saved in ' + outputFile);
			} catch (err) {
				console.log(err);
			}

			//////////////////////////////////////////////////////////////////////////

			await PDFNet.addResourceSearchPath('../../lib/');

			if (!await PDFNet.StructuredOutputModule.isModuleAvailable()) {
				console.log('\nUnable to run part of the sample: Apryse SDK Structured Output module not available.');
				console.log('---------------------------------------------------------------');
				console.log('The Structured Output module is an optional add-on, available for download');
				console.log('at https://docs.apryse.com/documentation/core/info/modules/. If you have already');
				console.log('downloaded this module, ensure that the SDK is able to find the required files');
				console.log('using the PDFNet::AddResourceSearchPath() function.\n');

				return;
			}

			//////////////////////////////////////////////////////////////////////////

			try {
				// Convert PDF document to HTML with reflow full option turned on (1)
				console.log('Converting PDF to HTML with reflow full option turned on (1)');

				const outputFile = outputPath + 'paragraphs_and_tables_reflow_full.html';

				const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();

				// Set e_reflow_full content reflow setting
				htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);

				// Convert PDF to HTML
				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);

				console.log('Result saved in ' + outputFile);
			} catch (err) {
				console.log(err);
			}

			//////////////////////////////////////////////////////////////////////////

			try {
				// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
				console.log('Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)');

				const outputFile = outputPath + 'paragraphs_and_tables_reflow_full_first_page.html';

				const htmlOutputOptions = new PDFNet.Convert.HTMLOutputOptions();

				// Set e_reflow_full content reflow setting
				htmlOutputOptions.setContentReflowSetting(PDFNet.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);

				// Convert only the first page
				htmlOutputOptions.setPages(1, 1);

				// Convert PDF to HTML
				await PDFNet.Convert.fileToHtml(inputPath + 'paragraphs_and_tables.pdf', outputFile, htmlOutputOptions);

				console.log('Result saved in ' + outputFile);
			} catch (err) {
				console.log(err);
			}

			//////////////////////////////////////////////////////////////////////////

			console.log('Done.');
		};

		PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) {
			console.log('Error: ' + JSON.stringify(error));
		}).then(function () { return PDFNet.shutdown(); });
	};
	exports.runPDF2HtmlTest();
})(exports);
// eslint-disable-next-line spaced-comment
//# sourceURL=PDF2HtmlTest.js