PDF2Html - Convert PDF to HTML - Go Sample Code

Sample code for using Apryse SDK to programmatically convert generic PDF documents to HTML, provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our PDF to HTML

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 . "pdftron"
10)
11
12import "pdftron/Samples/LicenseKey/GO"
13
14//---------------------------------------------------------------------------------------
15// The following sample illustrates how to use the PDF::Convert utility class to convert
16// documents and files to HTML.
17//
18// There are two HTML modules and one of them is an optional PDFNet Add-on.
19// 1. The built-in HTML module is used to convert PDF documents to fixed-position HTML
20// documents.
21// 2. The optional add-on module is used to convert PDF documents to HTML documents with
22// text flowing across the browser window.
23//
24// The PDFTron SDK HTML add-on module can be downloaded from http://www.pdftron.com/
25//
26// Please contact us if you have any questions.
27//---------------------------------------------------------------------------------------
28
29// Relative path to the folder containing the test files.
30var inputPath = "../../TestFiles/"
31var outputPath = "../../TestFiles/Output/"
32
33//---------------------------------------------------------------------------------------
34
35func catch(err *error) {
36 if r := recover(); r != nil {
37 *err = fmt.Errorf("%v", r)
38 }
39}
40
41//---------------------------------------------------------------------------------------
42
43func ConvertToHtmlFixedPositionTest() (err error) {
44 defer catch(&err)
45
46 // Convert PDF document to HTML with fixed positioning option turned on (default)
47 fmt.Println("Converting PDF to HTML with fixed positioning option turned on (default)")
48
49 inputFile := inputPath + "paragraphs_and_tables.pdf"
50 outputFile := outputPath + "paragraphs_and_tables_fixed_positioning"
51
52 // Convert to HTML
53 ConvertToHtml(inputFile, outputFile)
54
55 fmt.Println("Result saved in " + outputFile)
56 return nil
57}
58
59//---------------------------------------------------------------------------------------
60
61func ConvertToHtmlReflowParagraphTest1() (err error) {
62 defer catch(&err)
63
64 // Convert PDF document to HTML with reflow full option turned on (1)
65 fmt.Println("Converting PDF to HTML with reflow full option turned on (1)")
66
67 inputFile := inputPath + "paragraphs_and_tables.pdf"
68 outputFile := outputPath + "paragraphs_and_tables_reflow_full.html"
69
70 htmlOutputOptions := NewHTMLOutputOptions()
71
72 // Set e_reflow_full content reflow setting
73 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
74
75 // Convert to HTML
76 ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
77
78 fmt.Println("Result saved in " + outputFile)
79 return nil
80}
81
82//---------------------------------------------------------------------------------------
83
84func ConvertToHtmlReflowParagraphTest2() (err error) {
85 defer catch(&err)
86
87 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
88 fmt.Println("Converting PDF to HTML with reflow full option turned on (only converting the first page) (2)")
89
90 inputFile := inputPath + "paragraphs_and_tables.pdf"
91 outputFile := outputPath + "paragraphs_and_tables_reflow_full_first_page.html"
92
93 htmlOutputOptions := NewHTMLOutputOptions()
94
95 // Set e_reflow_full content reflow setting
96 htmlOutputOptions.SetContentReflowSetting(HTMLOutputOptionsE_reflow_full);
97
98 // Convert only the first page
99 htmlOutputOptions.SetPages(1, 1);
100
101 // Convert to HTML
102 ConvertToHtml(inputFile, outputFile, htmlOutputOptions)
103
104 fmt.Println("Result saved in " + outputFile)
105 return nil
106}
107
108//---------------------------------------------------------------------------------------
109
110func main() {
111 // The first step in every application using PDFNet is to initialize the
112 // library. The library is usually initialized only once, but calling
113 // Initialize() multiple times is also fine.
114 PDFNetInitialize(PDFTronLicense.Key)
115
116 //-----------------------------------------------------------------------------------
117
118 // Convert PDF document to HTML with fixed positioning option turned on (default)
119 err := ConvertToHtmlFixedPositionTest()
120 if err != nil {
121 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
122 }
123
124 //-----------------------------------------------------------------------------------
125
126 PDFNetAddResourceSearchPath("../../../PDFNetC/Lib/")
127
128 if !StructuredOutputModuleIsModuleAvailable() {
129 fmt.Println("")
130 fmt.Println("Unable to run part of the sample: PDFTron SDK Structured Output module not available.")
131 fmt.Println("-------------------------------------------------------------------------------------")
132 fmt.Println("The Structured Output module is an optional add-on, available for download")
133 fmt.Println("at https://docs.apryse.com/core/info/modules/. If you have already")
134 fmt.Println("downloaded this module, ensure that the SDK is able to find the required file")
135 fmt.Println("using the PDFNet::AddResourceSearchPath() function.")
136 fmt.Println("")
137 return
138 }
139
140 //-----------------------------------------------------------------------------------
141
142 // Convert PDF document to HTML with reflow full option turned on (1)
143 err = ConvertToHtmlReflowParagraphTest1()
144 if err != nil {
145 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
146 }
147
148 //-----------------------------------------------------------------------------------
149
150 // Convert PDF document to HTML with reflow full option turned on (only converting the first page) (2)
151 err = ConvertToHtmlReflowParagraphTest2()
152 if err != nil {
153 fmt.Println(fmt.Errorf("Unable to convert PDF document to HTML, error: %s", err))
154 }
155
156 //-----------------------------------------------------------------------------------
157
158 PDFNetTerminate()
159 fmt.Println("Done.")
160}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales