PDF2Html - Convert PDF to HTML - C# (.Net) Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.PDF;
9
10namespace PDF2HtmlTestCS
11{
12 /// <summary>
13 // The following sample illustrates how to use the PDF::Convert utility class to convert
14 // documents and files to HTML.
15 //
16 // There are two HTML modules and one of them is an optional PDFNet Add-on.
17 // 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
18 // documents.
19 // 2. The optional add-on module is used to convert PDF documents to HTML documents with
20 // text flowing across the browser window.
21 //
22 // The Apryse SDK HTML add-on module can be downloaded from http://www.pdftron.com/
23 //
24 // Please contact us if you have any questions.
25 /// </summary>
26
27 class Class1
28 {
29 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
30
31 static Class1() { }
32
33 // Relative path to the folder containing test files.
34 const string inputPath = "../../../../TestFiles/";
35 const string outputPath = "../../../../TestFiles/Output/";
36
37 /// <summary>
38 /// The main entry point for the application.
39 /// </summary>
40 [STAThread]
41 static int Main(string[] args)
42 {
43 // The first step in every application using PDFNet is to initialize the
44 // library. The library is usually initialized only once, but calling
45 // Initialize() multiple times is also fine.
46 PDFNet.Initialize(PDFTronLicense.Key);
47
48 bool err = false;
49
50 //////////////////////////////////////////////////////////////////////////
51
52 try
53 {
54 // Convert PDF document to HTML with fixed positioning option turned on (default)
55 Console.WriteLine("Converting PDF to HTML with fixed positioning option turned on (default)");
56
57 string outputFile = outputPath + "paragraphs_and_tables_fixed_positioning";
58
59 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile);
60
61 Console.WriteLine("Result saved in " + outputFile);
62 }
63 catch (PDFNetException e)
64 {
65 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
66 err = true;
67 }
68 catch (Exception e)
69 {
70 Console.WriteLine("Unknown Exception, error: ");
71 Console.WriteLine(e);
72 err = true;
73 }
74
75 //////////////////////////////////////////////////////////////////////////
76
77 PDFNet.AddResourceSearchPath("../../../../../Lib/");
78
79 if (!StructuredOutputModule.IsModuleAvailable())
80 {
81 Console.WriteLine();
82 Console.WriteLine("Unable to run part of the sample: Apryse SDK Structured Output module not available.");
83 Console.WriteLine("-------------------------------------------------------------------------------------");
84 Console.WriteLine("The Structured Output module is an optional add-on, available for download");
85 Console.WriteLine("at https://docs.apryse.com/core/info/modules/. If you have already");
86 Console.WriteLine("downloaded this module, ensure that the SDK is able to find the required files");
87 Console.WriteLine("using the PDFNet::AddResourceSearchPath() function.");
88 Console.WriteLine();
89 return 0;
90 }
91
92 //////////////////////////////////////////////////////////////////////////
93
94 try
95 {
96 // Convert PDF document to HTML with reflow full option turned on (1)
97 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (1)");
98
99 string outputFile = outputPath + "paragraphs_and_tables_reflow_full.html";
100
101 pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
102
103 // Set e_reflow_full content reflow setting
104 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
105
106 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
107
108 Console.WriteLine("Result saved in " + outputFile);
109 }
110 catch (PDFNetException e)
111 {
112 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
113 err = true;
114 }
115 catch (Exception e)
116 {
117 Console.WriteLine("Unknown Exception, error: ");
118 Console.WriteLine(e);
119 err = true;
120 }
121
122 //////////////////////////////////////////////////////////////////////////
123
124 try
125 {
126 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
127 Console.WriteLine("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)");
128
129 string outputFile = outputPath + "paragraphs_and_tables_reflow_full_first_page.html";
130
131 pdftron.PDF.Convert.HTMLOutputOptions htmlOutputOptions = new pdftron.PDF.Convert.HTMLOutputOptions();
132
133 // Set e_reflow_full content reflow setting
134 htmlOutputOptions.SetContentReflowSetting(pdftron.PDF.Convert.HTMLOutputOptions.ContentReflowSetting.e_reflow_full);
135
136 // Convert only the first page
137 htmlOutputOptions.SetPages(1, 1);
138
139 pdftron.PDF.Convert.ToHtml(inputPath + "paragraphs_and_tables.pdf", outputFile, htmlOutputOptions);
140
141 Console.WriteLine("Result saved in " + outputFile);
142 }
143 catch (PDFNetException e)
144 {
145 Console.WriteLine("Unable to convert PDF document to HTML, error: " + e.Message);
146 err = true;
147 }
148 catch (Exception e)
149 {
150 Console.WriteLine("Unknown Exception, error: ");
151 Console.WriteLine(e);
152 err = true;
153 }
154
155 //////////////////////////////////////////////////////////////////////////
156
157 PDFNet.Terminate();
158 Console.WriteLine("Done.");
159 return (err == false ? 0 : 1);
160 }
161 }
162}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales