Sample Java code for using Apryse SDK to convert Office documents to PDF (including Word, Excel, PowerPoint and Publisher) without needing any external dependencies or MS Office licenses. Office to PDF conversion can be performed on a Linux or Windows server to automate Office-centric workflows, or entirely in the user's client (web browser, mobile device). The conversion functionality can be combined with our Viewer to display or annotate Office files (docx, xlsx, pptx) on all major platforms, including Web, Android, iOS, Xamarin, UWP, and Windows. Learn more about our Android SDK and Office Document Conversion Library.
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import android.content.Context;
9
10import com.pdftron.android.pdfnetsdksamples.OutputListener;
11import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
12import com.pdftron.android.pdfnetsdksamples.R;
13import com.pdftron.android.pdfnetsdksamples.util.Utils;
14import com.pdftron.common.PDFNetException;
15import com.pdftron.pdf.Convert;
16import com.pdftron.pdf.DocumentConversion;
17import com.pdftron.pdf.OfficeToPDFOptions;
18import com.pdftron.pdf.PDFDoc;
19import com.pdftron.pdf.PDFNet;
20import com.pdftron.sdf.SDFDoc;
21
22import java.io.File;
23import java.util.ArrayList;
24
25/**
26 * The following sample illustrates how to use the PDF.Convert utility class to convert
27 * .docx files to PDF
28 * <p>
29 * This conversion is performed entirely within the PDFNet and has *no* external or
30 * system dependencies dependencies -- Conversion results will be the sam whether
31 * on Windows, Linux or Android.
32 * <p>
33 * Please contact us if you have any questions.
34 */
35public class OfficeToPDFTest extends PDFNetSample {
36
37 private static OutputListener mOutputListener;
38
39 private static ArrayList<String> mFileList = new ArrayList<>();
40
41 private static String sLayoutSmartPluginPath;
42
43 public OfficeToPDFTest(Context context) {
44 try {
45 String layoutPluginPath = Utils.copyResourceToTempFolder(context, R.raw.pdftron_layout_resources, false, "pdftron_layout_resources.plugin");
46 PDFNet.addResourceSearchPath(layoutPluginPath);
47 sLayoutSmartPluginPath = Utils.copyResourceToTempFolder(context, R.raw.pdftron_smart_substitution, false, "pdftron_smart_substitution.plugin");
48 PDFNet.addResourceSearchPath(sLayoutSmartPluginPath);
49 } catch (Exception e) {
50 mOutputListener.printError(e.getStackTrace());
51 }
52
53 setTitle(R.string.sample_officetopdf_title);
54 setDescription(R.string.sample_officetopdf_description);
55 }
56
57 @Override
58 public void run(OutputListener outputListener) {
59 super.run(outputListener);
60 mOutputListener = outputListener;
61 mFileList.clear();
62 printHeader(outputListener);
63
64
65 // first the one-line conversion interface
66 simpleDocxConvert("Fishermen.docx", "Fishermen.pdf");
67
68 // then the more flexible line-by-line interface
69 flexibleDocxConvert("the_rime_of_the_ancient_mariner.docx", "the_rime_of_the_ancient_mariner.pdf");
70
71 // conversion of RTL content
72 flexibleDocxConvert("factsheet_Arabic.docx", "factsheet_Arabic.pdf");
73
74 for (String file : mFileList) {
75 addToFileList(file);
76 }
77 printFooter(outputListener);
78 }
79
80
81 public static void simpleDocxConvert(String inputFilename, String outputFilename) {
82 try (PDFDoc doc = new PDFDoc()) {
83
84 // perform the conversion with no optional parameters
85 PDFDoc pdfdoc = new PDFDoc();
86 Convert.officeToPdf(pdfdoc, Utils.getAssetTempFile(INPUT_PATH + inputFilename).getAbsolutePath(), null);
87
88 // save the result
89 pdfdoc.save(Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath(), SDFDoc.SaveMode.INCREMENTAL, null);
90
91 // And we're done!
92 mOutputListener.println("Done conversion " + Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath());
93 } catch (PDFNetException e) {
94 mOutputListener.println("Unable to convert MS Office document, error:");
95 mOutputListener.printError(e.getStackTrace());
96 mOutputListener.printError(e.getStackTrace());
97 }
98 }
99
100 public static void flexibleDocxConvert(String inputFilename, String outputFilename) {
101 try {
102 OfficeToPDFOptions options = new OfficeToPDFOptions();
103 options.setSmartSubstitutionPluginPath(sLayoutSmartPluginPath);
104
105 // create a conversion object -- this sets things up but does not yet
106 // perform any conversion logic.
107 // in a multithreaded environment, this object can be used to monitor
108 // the conversion progress and potentially cancel it as well
109 DocumentConversion conversion = Convert.streamingPdfConversion(
110 Utils.getAssetTempFile(INPUT_PATH + inputFilename).getAbsolutePath(), options);
111
112 mOutputListener.println(inputFilename + ": " + Math.round(conversion.getProgress() * 100.0)
113 + "% " + conversion.getProgressLabel());
114
115 // actually perform the conversion
116 while (conversion.getConversionStatus() == DocumentConversion.e_incomplete) {
117 conversion.convertNextPage();
118 mOutputListener.println(inputFilename + ": " + Math.round(conversion.getProgress() * 100.0)
119 + "% " + conversion.getProgressLabel());
120 }
121
122 if (conversion.tryConvert() == DocumentConversion.e_success) {
123 int num_warnings = conversion.getNumWarnings();
124
125 // print information about the conversion
126 for (int i = 0; i < num_warnings; ++i) {
127 mOutputListener.println("Warning: " + conversion.getWarningString(i));
128 }
129
130 // save the result
131 try (PDFDoc doc = conversion.getDoc()) {
132 doc.save(Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath(), SDFDoc.SaveMode.INCREMENTAL, null);
133 }
134
135 // done
136 mOutputListener.println("Done conversion " + Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath());
137 } else {
138 mOutputListener.println("Encountered an error during conversion: " + conversion.getErrorString());
139 }
140 } catch (PDFNetException e) {
141 mOutputListener.println("Unable to convert MS Office document, error:");
142 mOutputListener.printError(e.getStackTrace());
143 mOutputListener.printError(e.getStackTrace());
144 }
145 }
146
147}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5package com.pdftron.android.pdfnetsdksamples.samples
6
7import com.pdftron.android.pdfnetsdksamples.OutputListener
8import com.pdftron.android.pdfnetsdksamples.PDFNetSample
9import com.pdftron.android.pdfnetsdksamples.R
10import com.pdftron.android.pdfnetsdksamples.util.Utils
11import com.pdftron.common.PDFNetException
12import com.pdftron.pdf.PDFDoc
13import com.pdftron.pdf.PageIterator
14import com.pdftron.pdf.struct.ContentItem
15import com.pdftron.pdf.struct.SElement
16import com.pdftron.pdf.struct.STree
17import com.pdftron.sdf.SDFDoc
18
19class LogicalStructureTest : PDFNetSample() {
20 override fun run(outputListener: OutputListener?) {
21 super.run(outputListener)
22 mOutputListener = outputListener
23 mFileList.clear()
24 printHeader(outputListener!!)
25 try // Extract logical structure from a PDF document
26 {
27 PDFDoc(Utils.getAssetTempFile(INPUT_PATH.toString() + "tagged.pdf")!!.getAbsolutePath()).use { doc ->
28 doc.initSecurityHandler()
29 mOutputListener!!.println("____________________________________________________________")
30 mOutputListener!!.println("Sample 1 - Traverse logical structure tree...")
31 run({
32 val tree: STree = doc.getStructTree()
33 if (tree.isValid()) {
34 mOutputListener!!.println("Document has a StructTree root.")
35 for (i in 0 until tree.getNumKids()) {
36 // Recursively get structure info for all all child elements.
37 ProcessStructElement(tree.getKid(i), 0)
38 }
39 } else {
40 mOutputListener!!.println("This document does not contain any logical structure.")
41 }
42 })
43 mOutputListener!!.println("\nDone 1.")
44 mOutputListener!!.println("____________________________________________________________")
45 mOutputListener!!.println("Sample 2 - Get parent logical structure elements from")
46 mOutputListener!!.println("layout elements.")
47 run({
48 val reader: com.pdftron.pdf.ElementReader = com.pdftron.pdf.ElementReader()
49 val itr: PageIterator = doc.getPageIterator()
50 while (itr.hasNext()) {
51 reader.begin(itr.next())
52 ProcessElements(reader)
53 reader.end()
54 }
55 })
56 mOutputListener!!.println("\nDone 2.")
57 mOutputListener!!.println("____________________________________________________________")
58 mOutputListener!!.println("Sample 3 - 'XML style' extraction of PDF logical structure and page content.")
59 run({
60
61 //A map which maps page numbers(as Integers)
62 //to page Maps(which map from struct mcid(as Integers) to
63 //text Strings)
64 val mcid_doc_map: MutableMap<Int, Map<Int, String>> = java.util.TreeMap<Int, Map<Int, String>>()
65 val reader: com.pdftron.pdf.ElementReader = com.pdftron.pdf.ElementReader()
66 val itr: PageIterator = doc.getPageIterator()
67 while (itr.hasNext()) {
68 val current: com.pdftron.pdf.Page? = itr.next()
69 reader.begin(current)
70 val page_mcid_map: MutableMap<Int, String> = java.util.TreeMap<Int, String>()
71 mcid_doc_map.put(current!!.getIndex(), page_mcid_map)
72 ProcessElements2(reader, page_mcid_map)
73 reader.end()
74 }
75 val tree: STree = doc.getStructTree()
76 if (tree.isValid()) {
77 for (i in 0 until tree.getNumKids()) {
78 ProcessStructElement2(tree.getKid(i), mcid_doc_map, 0)
79 }
80 }
81 })
82 mOutputListener!!.println("\nDone 3.")
83 doc.save(Utils.createExternalFile("LogicalStructure.pdf", mFileList).getAbsolutePath(), SDFDoc.SaveMode.LINEARIZED, null)
84 }
85 } catch (e: java.lang.Exception) {
86 mOutputListener!!.printError(e.getStackTrace())
87 }
88 for (file in mFileList) {
89 addToFileList(file)
90 }
91 printFooter(outputListener)
92 }
93
94 companion object {
95 private var mOutputListener: OutputListener? = null
96 private val mFileList: java.util.ArrayList<String> = java.util.ArrayList<String>()
97 fun PrintIndent(indent: Int) {
98 mOutputListener!!.println()
99 for (i in 0 until indent) mOutputListener!!.print(" ")
100 }
101
102 // Used in code snippet 1.
103 @Throws(PDFNetException::class)
104 fun ProcessStructElement(element: SElement, indent: Int) {
105 var indent = indent
106 if (!element.isValid()) {
107 return
108 }
109
110 // Print out the type and title info, if any.
111 PrintIndent(indent++)
112 mOutputListener!!.print("Type: " + element.getType())
113 if (element.hasTitle()) {
114 mOutputListener!!.print(". Title: " + element.getTitle())
115 }
116 val num: Int = element.getNumKids()
117 for (i in 0 until num) {
118 // Check is the kid is a leaf node (i.e. it is a ContentItem).
119 if (element.isContentItem(i)) {
120 val cont: ContentItem = element.getAsContentItem(i)
121 val type: Int = cont.getType()
122 val page: com.pdftron.pdf.Page = cont.getPage()
123 PrintIndent(indent)
124 mOutputListener!!.print("Content Item. Part of page #" + page.getIndex())
125 PrintIndent(indent)
126 when (type) {
127 ContentItem.e_MCID, ContentItem.e_MCR -> mOutputListener!!.print("MCID: " + cont.getMCID())
128 ContentItem.e_OBJR -> {
129 mOutputListener!!.print("OBJR ")
130 val ref_obj: com.pdftron.sdf.Obj = cont.getRefObj()
131 if (ref_obj != null) mOutputListener!!.print("- Referenced Object#: " + ref_obj.getObjNum())
132 }
133 else -> {
134 }
135 }
136 } else { // the kid is another StructElement node.
137 ProcessStructElement(element.getAsStructElem(i), indent)
138 }
139 }
140 }
141
142 // Used in code snippet 2.
143 @Throws(PDFNetException::class)
144 fun ProcessElements(reader: com.pdftron.pdf.ElementReader) {
145 var element: com.pdftron.pdf.Element?
146 // Read page contents
147 while (true) {
148 element = reader.next()
149 if (element == null) {
150 break
151 }
152 // In this sample we process only paths & text, but the code can be
153 // extended to handle any element type.
154 val type: Int = element.getType()
155 if (type == com.pdftron.pdf.Element.e_path || type == com.pdftron.pdf.Element.e_text || type == com.pdftron.pdf.Element.e_path) {
156 when (type) {
157 com.pdftron.pdf.Element.e_path -> mOutputListener!!.print("\nPATH: ")
158 com.pdftron.pdf.Element.e_text -> mOutputListener!!.print("""
159
160 TEXT: ${element.getTextString()}
161
162 """.trimIndent())
163 com.pdftron.pdf.Element.e_form -> mOutputListener!!.print("\nFORM XObject: ")
164 }
165
166 // Check if the element is associated with any structural element.
167 // Content items are leaf nodes of the structure tree.
168 val struct_parent: SElement = element.getParentStructElement()
169 if (struct_parent.isValid()) {
170 // Print out the parent structural element's type, title, and object number.
171 mOutputListener!!.print(" Type: " + struct_parent.getType()
172 + ", MCID: " + element.getStructMCID())
173 if (struct_parent.hasTitle()) {
174 mOutputListener!!.print(". Title: " + struct_parent.getTitle())
175 }
176 mOutputListener!!.print(", Obj#: " + struct_parent.getSDFObj().getObjNum())
177 }
178 }
179 }
180 }
181
182 // Used in code snippet 3.
183 //typedef map<int, string> MCIDPageMap;
184 //typedef map<int, MCIDPageMap> MCIDDocMap;
185 // Used in code snippet 3.
186 @Throws(PDFNetException::class)
187 fun ProcessElements2(reader: com.pdftron.pdf.ElementReader, mcid_page_map: MutableMap<Int, String>) {
188 var element: com.pdftron.pdf.Element?
189 // Read page contents
190 while (true) {
191 element = reader.next()
192 if (element == null) {
193 break
194 }
195 // In this sample we process only text, but the code can be extended
196 // to handle paths, images, or any other Element type.
197 val mcid: Int = element.getStructMCID()
198 if (mcid >= 0 && element.getType() == com.pdftron.pdf.Element.e_text) {
199 val `val`: String = element.getTextString()
200 if (mcid_page_map.containsKey(mcid)) mcid_page_map.put(mcid, mcid_page_map[mcid] + `val`) else mcid_page_map.put(mcid, `val`)
201 }
202 }
203 }
204
205 // Used in code snippet 3.
206 @Throws(PDFNetException::class)
207 fun ProcessStructElement2(element: SElement, mcid_doc_map: Map<Int, Map<Int, String>>, indent: Int) {
208 if (!element.isValid()) {
209 return
210 }
211
212 // Print out the type and title info, if any.
213 PrintIndent(indent)
214 mOutputListener!!.print("<" + element.getType())
215 if (element.hasTitle()) {
216 mOutputListener!!.print(" title=\"" + element.getTitle() + "\"")
217 }
218 mOutputListener!!.print(">")
219 val num: Int = element.getNumKids()
220 for (i in 0 until num) {
221 if (element.isContentItem(i)) {
222 val cont: ContentItem = element.getAsContentItem(i)
223 if (cont.getType() == ContentItem.e_MCID) {
224 val page_num: Int = cont.getPage().getIndex()
225 if (mcid_doc_map.containsKey(page_num)) {
226 val mcid_page_map = mcid_doc_map[page_num]!!
227 val mcid_key: Int = cont.getMCID()
228 if (mcid_page_map.containsKey(mcid_key)) {
229 mOutputListener!!.print(mcid_page_map[mcid_key])
230 }
231 }
232 }
233 } else { // the kid is another StructElement node.
234 ProcessStructElement2(element.getAsStructElem(i), mcid_doc_map, indent + 1)
235 }
236 }
237 PrintIndent(indent)
238 mOutputListener!!.print("</" + element.getType() + ">")
239 }
240 /**
241 * @param args
242 */
243 }
244
245 init {
246 setTitle(R.string.sample_logicalstructure_title)
247 setDescription(R.string.sample_logicalstructure_description)
248 }
249}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales