PDF2Html - Convert PDF to HTML - PHP Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10//---------------------------------------------------------------------------------------
11// The following sample illustrates how to use the PDF::Convert utility class to convert
12// documents and files to HTML.
13//
14// There are two HTML modules and one of them is an optional PDFNet Add-on.
15// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
16// documents.
17// 2. The optional add-on module is used to convert PDF documents to HTML documents with
18// text flowing across the browser window.
19//
20// The PDFTron SDK HTML add-on module can be downloaded from https://dev.apryse.com/
21//
22// Please contact us if you have any questions.
23//---------------------------------------------------------------------------------------
24
25function main()
26{
27 // Relative path to the folder containing the test files.
28 $inputPath = getcwd()."/../../TestFiles/";
29 $outputPath = $inputPath."Output/";
30
31 // The first step in every application using PDFNet is to initialize the
32 // library. The library is usually initialized only once, but calling
33 // Initialize() multiple times is also fine.
34 global $LicenseKey;
35 PDFNet::Initialize($LicenseKey);
36 PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
37
38 //-----------------------------------------------------------------------------------
39
40 try {
41 // Convert PDF document to HTML with fixed positioning option turned on (default)
42 echo(nl2br("Converting PDF to HTML with fixed positioning option turned on (default)\n"));
43
44 $outputFile = $outputPath."paragraphs_and_tables_fixed_positioning";
45
46 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile);
47
48 echo(nl2br("Result saved in " . $outputFile . "\n"));
49 }
50 catch(Exception $e) {
51 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
52 }
53
54 //-----------------------------------------------------------------------------------
55
56 PDFNet::AddResourceSearchPath("../../../PDFNetC/Lib/");
57
58 if (!StructuredOutputModule::IsModuleAvailable()) {
59 echo(nl2br("\n"));
60 echo(nl2br("Unable to run part of the sample: PDFTron SDK Structured Output module not available.\n"));
61 echo(nl2br("-------------------------------------------------------------------------------------\n"));
62 echo(nl2br("The Structured Output module is an optional add-on, available for download\n"));
63 echo(nl2br("at https://docs.apryse.com/core/info/modules/. If you have already\n"));
64 echo(nl2br("downloaded this module, ensure that the SDK is able to find the required files\n"));
65 echo(nl2br("using the PDFNet::AddResourceSearchPath() function.\n"));
66 echo(nl2br("\n"));
67 return;
68 }
69
70 //-----------------------------------------------------------------------------------
71
72 try {
73 // Convert PDF document to HTML with reflow full option turned on (1)
74 echo(nl2br("Converting PDF to HTML with reflow full option turned on (1)\n"));
75
76 $outputFile = $outputPath."paragraphs_and_tables_reflow_full.html";
77
78 $htmlOutputOptions = new HTMLOutputOptions();
79
80 // Set e_reflow_full content reflow setting
81 $htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
82
83 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
84
85 echo(nl2br("Result saved in " . $outputFile . "\n"));
86 }
87 catch(Exception $e) {
88 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
89 }
90
91 //-----------------------------------------------------------------------------------
92
93 try {
94 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
95 echo(nl2br("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)\n"));
96
97 $outputFile = $outputPath."paragraphs_and_tables_reflow_full_first_page.html";
98
99 $htmlOutputOptions = new HTMLOutputOptions();
100
101 // Set e_reflow_full content reflow setting
102 $htmlOutputOptions->SetContentReflowSetting(HTMLOutputOptions::e_reflow_full);
103
104 // Convert only the first page
105 $htmlOutputOptions->SetPages(1, 1);
106
107 Convert::ToHtml($inputPath."paragraphs_and_tables.pdf", $outputFile, $htmlOutputOptions);
108
109 echo(nl2br("Result saved in " . $outputFile . "\n"));
110 }
111 catch(Exception $e) {
112 echo(nl2br("Unable to convert PDF document to HTML, error: " . $e->getMessage() . "\n"));
113 }
114
115 //-----------------------------------------------------------------------------------
116 PDFNet::Terminate();
117 echo(nl2br("Done.\n"));
118}
119
120main();
121?>

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales