Some test text!

Discord Logo

Chat with us

PDFTron is now Apryse, learn more here.

Convert PDF to HTML in C++

More languages

More languages
C++
C#
C# (.NET Core)
Go
Java
Obj-C
JS (Node.js)
PHP
Python
Ruby
VB

Sample C++ code for using PDFTron SDK to programmatically convert generic PDF documents to HTML. Learn more about our C++ PDF to HTML

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2023 by PDFTron Systems Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

#include <iostream>
#include <sstream>
#include <PDF/PDFNet.h>
#include <PDF/Convert.h>
#include <PDF/StructuredOutputModule.h>
#include "../../LicenseKey/CPP/LicenseKey.h"

//---------------------------------------------------------------------------------------
// The following sample illustrates how to use the PDF::Convert utility class to convert 
// documents and files to HTML.
//
// There are two HTML modules and one of them is an optional PDFNet Add-on.
// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
//    documents.
// 2. The optional add-on module is used to convert PDF documents to HTML documents with
//    text flowing across the browser window.
//
// The PDFTron SDK HTML add-on module can be downloaded from http://www.pdftron.com/
//
// Please contact us if you have any questions.	
//---------------------------------------------------------------------------------------

using namespace pdftron;
using namespace PDF;
using namespace std;

UString inputPath("../../TestFiles/");
UString outputPath("../../TestFiles/Output/");

int main(int argc, char *argv[])
{	
	// The first step in every application using PDFNet is to initialize the 
	// library. The library is usually initialized only once, but calling 
	// Initialize() multiple times is also fine.
	PDFNet::Initialize(LicenseKey);

	int err = 0;

	//////////////////////////////////////////////////////////////////////////

	try
	{
		// Convert PDF document to HTML with fixed positioning option turned on (default)
		cout << "Converting PDF to HTML with fixed positioning option turned on (default)" << endl;

		UString outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";

		// Convert PDF to HTML
		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);

		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
	}
	catch (Common::Exception& e)
	{
		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
		err = 1;
	}
	catch (...)
	{
		cout << "Unknown Exception" << endl;
		err = 1;
	}

	//////////////////////////////////////////////////////////////////////////

	PDFNet::AddResourceSearchPath("../../../Lib/");

	if (!StructuredOutputModule::IsModuleAvailable())
	{
		cout << endl;
		cout << "Unable to run part of the sample: PDFTron SDK Structured Output module not available." << endl;
		cout << "-------------------------------------------------------------------------------------" << endl;
		cout << "The Structured Output module is an optional add-on, available for download" << endl;
		cout << "at https://docs.apryse.com/documentation/core/info/modules/. If you have already" << endl;
		cout << "downloaded this module, ensure that the SDK is able to find the required files" << endl;
		cout << "using the PDFNet::AddResourceSearchPath() function." << endl;
		cout << endl;
		return 0;
	}

	//////////////////////////////////////////////////////////////////////////

	try
	{
		// Convert PDF document to HTML with reflow full option turned on (1)
		cout << "Converting PDF to HTML with reflow full option turned on (1)" << endl;

		UString outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";

		Convert::HTMLOutputOptions htmlOutputOptions;

		// Set e_reflow_full content reflow setting
		htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);

		// Convert PDF to HTML
		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);

		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
	}
	catch (Common::Exception& e)
	{
		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
		err = 1;
	}
	catch (...)
	{
		cout << "Unknown Exception" << endl;
		err = 1;
	}

	//////////////////////////////////////////////////////////////////////////

	try
	{
		// Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
		cout << "Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)" << endl;

		UString outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";

		Convert::HTMLOutputOptions htmlOutputOptions;

		// Set e_reflow_full content reflow setting
		htmlOutputOptions.SetContentReflowSetting(Convert::HTMLOutputOptions::e_reflow_full);

		// Convert only the first page
		htmlOutputOptions.SetPages(1, 1);

		// Convert PDF to HTML
		Convert::ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);

		cout << "Result saved in " << outputFile.ConvertToUtf8().c_str() << endl;
	}
	catch (Common::Exception& e)
	{
		cout << "Unable to convert PDF document to HTML, error: " << e << endl;
		err = 1;
	}
	catch (...)
	{
		cout << "Unknown Exception" << endl;
		err = 1;
	}

	//////////////////////////////////////////////////////////////////////////

	PDFNet::Terminate();
	cout << "Done.\n";
	return err;
}