OfficeToPDF

Sample Java code for using Apryse SDK to convert Office documents to PDF (including Word, Excel, PowerPoint and Publisher) without needing any external dependencies or MS Office licenses. Office to PDF conversion can be performed on a Linux or Windows server to automate Office-centric workflows, or entirely in the user's client (web browser, mobile device). The conversion functionality can be combined with our Viewer to display or annotate Office files (docx, xlsx, pptx) on all major platforms, including Web, Android, iOS, Xamarin, UWP, and Windows. Learn more about our Android SDK and Office Document Conversion Library.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import android.content.Context;
9
10import com.pdftron.android.pdfnetsdksamples.OutputListener;
11import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
12import com.pdftron.android.pdfnetsdksamples.R;
13import com.pdftron.android.pdfnetsdksamples.util.Utils;
14import com.pdftron.common.PDFNetException;
15import com.pdftron.pdf.Convert;
16import com.pdftron.pdf.DocumentConversion;
17import com.pdftron.pdf.OfficeToPDFOptions;
18import com.pdftron.pdf.PDFDoc;
19import com.pdftron.pdf.PDFNet;
20import com.pdftron.sdf.SDFDoc;
21
22import java.io.File;
23import java.util.ArrayList;
24
25/**
26 * The following sample illustrates how to use the PDF.Convert utility class to convert
27 * .docx files to PDF
28 * <p>
29 * This conversion is performed entirely within the PDFNet and has *no* external or
30 * system dependencies dependencies -- Conversion results will be the sam whether
31 * on Windows, Linux or Android.
32 * <p>
33 * Please contact us if you have any questions.
34 */
35public class OfficeToPDFTest extends PDFNetSample {
36
37	private static OutputListener mOutputListener;
38
39	private static ArrayList<String> mFileList = new ArrayList<>();
40
41	private static String sLayoutSmartPluginPath;
42
43    public OfficeToPDFTest(Context context) {
44        try {
45            String layoutPluginPath = Utils.copyResourceToTempFolder(context, R.raw.pdftron_layout_resources, false, "pdftron_layout_resources.plugin");
46            PDFNet.addResourceSearchPath(layoutPluginPath);
47            sLayoutSmartPluginPath = Utils.copyResourceToTempFolder(context, R.raw.pdftron_smart_substitution, false, "pdftron_smart_substitution.plugin");
48            PDFNet.addResourceSearchPath(sLayoutSmartPluginPath);
49        } catch (Exception e) {
50            mOutputListener.printError(e.getStackTrace());
51        }
52
53        setTitle(R.string.sample_officetopdf_title);
54        setDescription(R.string.sample_officetopdf_description);
55    }
56
57	@Override
58	public void run(OutputListener outputListener) {
59		super.run(outputListener);
60		mOutputListener = outputListener;
61		mFileList.clear();
62		printHeader(outputListener);
63        
64
65        // first the one-line conversion interface
66        simpleDocxConvert("Fishermen.docx", "Fishermen.pdf");
67
68        // then the more flexible line-by-line interface
69        flexibleDocxConvert("the_rime_of_the_ancient_mariner.docx", "the_rime_of_the_ancient_mariner.pdf");
70       
71        // conversion of RTL content
72        flexibleDocxConvert("factsheet_Arabic.docx", "factsheet_Arabic.pdf");
73
74		for (String file : mFileList) {
75			addToFileList(file);
76		}
77		printFooter(outputListener);
78	}
79
80
81    public static void simpleDocxConvert(String inputFilename, String outputFilename) {
82        try (PDFDoc doc = new PDFDoc()) {
83
84            // perform the conversion with no optional parameters
85            PDFDoc pdfdoc = new PDFDoc();
86            Convert.officeToPdf(pdfdoc, Utils.getAssetTempFile(INPUT_PATH + inputFilename).getAbsolutePath(), null);
87
88            // save the result
89            pdfdoc.save(Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath(), SDFDoc.SaveMode.INCREMENTAL, null);
90
91            // And we're done!
92            mOutputListener.println("Done conversion " + Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath());
93        } catch (PDFNetException e) {
94            mOutputListener.println("Unable to convert MS Office document, error:");
95            mOutputListener.printError(e.getStackTrace());
96            mOutputListener.printError(e.getStackTrace());
97        }
98    }
99
100    public static void flexibleDocxConvert(String inputFilename, String outputFilename) {
101        try {
102            OfficeToPDFOptions options = new OfficeToPDFOptions();
103            options.setSmartSubstitutionPluginPath(sLayoutSmartPluginPath);
104
105            // create a conversion object -- this sets things up but does not yet
106            // perform any conversion logic.
107            // in a multithreaded environment, this object can be used to monitor
108            // the conversion progress and potentially cancel it as well
109            DocumentConversion conversion = Convert.streamingPdfConversion(
110                    Utils.getAssetTempFile(INPUT_PATH + inputFilename).getAbsolutePath(), options);
111
112            mOutputListener.println(inputFilename + ": " + Math.round(conversion.getProgress() * 100.0)
113                    + "% " + conversion.getProgressLabel());
114
115            // actually perform the conversion
116            while (conversion.getConversionStatus() == DocumentConversion.e_incomplete) {
117                conversion.convertNextPage();
118                mOutputListener.println(inputFilename + ": " + Math.round(conversion.getProgress() * 100.0)
119                        + "% " + conversion.getProgressLabel());
120            }
121
122            if (conversion.tryConvert() == DocumentConversion.e_success) {
123                int num_warnings = conversion.getNumWarnings();
124
125                // print information about the conversion
126                for (int i = 0; i < num_warnings; ++i) {
127                    mOutputListener.println("Warning: " + conversion.getWarningString(i));
128                }
129
130                // save the result
131                try (PDFDoc doc = conversion.getDoc()) {
132                    doc.save(Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath(), SDFDoc.SaveMode.INCREMENTAL, null);
133                }
134
135                // done
136                mOutputListener.println("Done conversion " + Utils.createExternalFile(outputFilename, mFileList).getAbsolutePath());
137            } else {
138                mOutputListener.println("Encountered an error during conversion: " + conversion.getErrorString());
139            }
140        } catch (PDFNetException e) {
141            mOutputListener.println("Unable to convert MS Office document, error:");
142            mOutputListener.printError(e.getStackTrace());
143            mOutputListener.printError(e.getStackTrace());
144        }
145    }
146
147}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5package com.pdftron.android.pdfnetsdksamples.samples
6
7import com.pdftron.android.pdfnetsdksamples.OutputListener
8import com.pdftron.android.pdfnetsdksamples.PDFNetSample
9import com.pdftron.android.pdfnetsdksamples.R
10import com.pdftron.android.pdfnetsdksamples.util.Utils
11import com.pdftron.common.PDFNetException
12import com.pdftron.pdf.PDFDoc
13import com.pdftron.pdf.PageIterator
14import com.pdftron.pdf.struct.ContentItem
15import com.pdftron.pdf.struct.SElement
16import com.pdftron.pdf.struct.STree
17import com.pdftron.sdf.SDFDoc
18
19class LogicalStructureTest : PDFNetSample() {
20    override fun run(outputListener: OutputListener?) {
21        super.run(outputListener)
22        mOutputListener = outputListener
23        mFileList.clear()
24        printHeader(outputListener!!)
25        try  // Extract logical structure from a PDF document
26        {
27            PDFDoc(Utils.getAssetTempFile(INPUT_PATH.toString() + "tagged.pdf")!!.getAbsolutePath()).use { doc ->
28                doc.initSecurityHandler()
29                mOutputListener!!.println("____________________________________________________________")
30                mOutputListener!!.println("Sample 1 - Traverse logical structure tree...")
31                run({
32                    val tree: STree = doc.getStructTree()
33                    if (tree.isValid()) {
34                        mOutputListener!!.println("Document has a StructTree root.")
35                        for (i in 0 until tree.getNumKids()) {
36                            // Recursively get structure  info for all all child elements.
37                            ProcessStructElement(tree.getKid(i), 0)
38                        }
39                    } else {
40                        mOutputListener!!.println("This document does not contain any logical structure.")
41                    }
42                })
43                mOutputListener!!.println("\nDone 1.")
44                mOutputListener!!.println("____________________________________________________________")
45                mOutputListener!!.println("Sample 2 - Get parent logical structure elements from")
46                mOutputListener!!.println("layout elements.")
47                run({
48                    val reader: com.pdftron.pdf.ElementReader = com.pdftron.pdf.ElementReader()
49                    val itr: PageIterator = doc.getPageIterator()
50                    while (itr.hasNext()) {
51                        reader.begin(itr.next())
52                        ProcessElements(reader)
53                        reader.end()
54                    }
55                })
56                mOutputListener!!.println("\nDone 2.")
57                mOutputListener!!.println("____________________________________________________________")
58                mOutputListener!!.println("Sample 3 - 'XML style' extraction of PDF logical structure and page content.")
59                run({
60
61                    //A map which maps page numbers(as Integers)
62                    //to page Maps(which map from struct mcid(as Integers) to
63                    //text Strings)
64                    val mcid_doc_map: MutableMap<Int, Map<Int, String>> = java.util.TreeMap<Int, Map<Int, String>>()
65                    val reader: com.pdftron.pdf.ElementReader = com.pdftron.pdf.ElementReader()
66                    val itr: PageIterator = doc.getPageIterator()
67                    while (itr.hasNext()) {
68                        val current: com.pdftron.pdf.Page? = itr.next()
69                        reader.begin(current)
70                        val page_mcid_map: MutableMap<Int, String> = java.util.TreeMap<Int, String>()
71                        mcid_doc_map.put(current!!.getIndex(), page_mcid_map)
72                        ProcessElements2(reader, page_mcid_map)
73                        reader.end()
74                    }
75                    val tree: STree = doc.getStructTree()
76                    if (tree.isValid()) {
77                        for (i in 0 until tree.getNumKids()) {
78                            ProcessStructElement2(tree.getKid(i), mcid_doc_map, 0)
79                        }
80                    }
81                })
82                mOutputListener!!.println("\nDone 3.")
83                doc.save(Utils.createExternalFile("LogicalStructure.pdf", mFileList).getAbsolutePath(), SDFDoc.SaveMode.LINEARIZED, null)
84            }
85        } catch (e: java.lang.Exception) {
86            mOutputListener!!.printError(e.getStackTrace())
87        }
88        for (file in mFileList) {
89            addToFileList(file)
90        }
91        printFooter(outputListener)
92    }
93
94    companion object {
95        private var mOutputListener: OutputListener? = null
96        private val mFileList: java.util.ArrayList<String> = java.util.ArrayList<String>()
97        fun PrintIndent(indent: Int) {
98            mOutputListener!!.println()
99            for (i in 0 until indent) mOutputListener!!.print("  ")
100        }
101
102        // Used in code snippet 1.
103        @Throws(PDFNetException::class)
104        fun ProcessStructElement(element: SElement, indent: Int) {
105            var indent = indent
106            if (!element.isValid()) {
107                return
108            }
109
110            // Print out the type and title info, if any.
111            PrintIndent(indent++)
112            mOutputListener!!.print("Type: " + element.getType())
113            if (element.hasTitle()) {
114                mOutputListener!!.print(". Title: " + element.getTitle())
115            }
116            val num: Int = element.getNumKids()
117            for (i in 0 until num) {
118                // Check is the kid is a leaf node (i.e. it is a ContentItem).
119                if (element.isContentItem(i)) {
120                    val cont: ContentItem = element.getAsContentItem(i)
121                    val type: Int = cont.getType()
122                    val page: com.pdftron.pdf.Page = cont.getPage()
123                    PrintIndent(indent)
124                    mOutputListener!!.print("Content Item. Part of page #" + page.getIndex())
125                    PrintIndent(indent)
126                    when (type) {
127                        ContentItem.e_MCID, ContentItem.e_MCR -> mOutputListener!!.print("MCID: " + cont.getMCID())
128                        ContentItem.e_OBJR -> {
129                            mOutputListener!!.print("OBJR ")
130                            val ref_obj: com.pdftron.sdf.Obj = cont.getRefObj()
131                            if (ref_obj != null) mOutputListener!!.print("- Referenced Object#: " + ref_obj.getObjNum())
132                        }
133                        else -> {
134                        }
135                    }
136                } else {  // the kid is another StructElement node.
137                    ProcessStructElement(element.getAsStructElem(i), indent)
138                }
139            }
140        }
141
142        // Used in code snippet 2.
143        @Throws(PDFNetException::class)
144        fun ProcessElements(reader: com.pdftron.pdf.ElementReader) {
145            var element: com.pdftron.pdf.Element?
146            // Read page contents
147            while (true) {
148                element = reader.next()
149                if (element == null) {
150                    break
151                }
152                // In this sample we process only paths & text, but the code can be
153                // extended to handle any element type.
154                val type: Int = element.getType()
155                if (type == com.pdftron.pdf.Element.e_path || type == com.pdftron.pdf.Element.e_text || type == com.pdftron.pdf.Element.e_path) {
156                    when (type) {
157                        com.pdftron.pdf.Element.e_path -> mOutputListener!!.print("\nPATH: ")
158                        com.pdftron.pdf.Element.e_text -> mOutputListener!!.print("""
159    
160    TEXT: ${element.getTextString()}
161    
162    """.trimIndent())
163                        com.pdftron.pdf.Element.e_form -> mOutputListener!!.print("\nFORM XObject: ")
164                    }
165
166                    // Check if the element is associated with any structural element.
167                    // Content items are leaf nodes of the structure tree.
168                    val struct_parent: SElement = element.getParentStructElement()
169                    if (struct_parent.isValid()) {
170                        // Print out the parent structural element's type, title, and object number.
171                        mOutputListener!!.print(" Type: " + struct_parent.getType()
172                                + ", MCID: " + element.getStructMCID())
173                        if (struct_parent.hasTitle()) {
174                            mOutputListener!!.print(". Title: " + struct_parent.getTitle())
175                        }
176                        mOutputListener!!.print(", Obj#: " + struct_parent.getSDFObj().getObjNum())
177                    }
178                }
179            }
180        }
181
182        // Used in code snippet 3.
183        //typedef map<int, string> MCIDPageMap;
184        //typedef map<int, MCIDPageMap> MCIDDocMap;
185        // Used in code snippet 3.
186        @Throws(PDFNetException::class)
187        fun ProcessElements2(reader: com.pdftron.pdf.ElementReader, mcid_page_map: MutableMap<Int, String>) {
188            var element: com.pdftron.pdf.Element?
189            // Read page contents
190            while (true) {
191                element = reader.next()
192                if (element == null) {
193                    break
194                }
195                // In this sample we process only text, but the code can be extended
196                // to handle paths, images, or any other Element type.
197                val mcid: Int = element.getStructMCID()
198                if (mcid >= 0 && element.getType() == com.pdftron.pdf.Element.e_text) {
199                    val `val`: String = element.getTextString()
200                    if (mcid_page_map.containsKey(mcid)) mcid_page_map.put(mcid, mcid_page_map[mcid] + `val`) else mcid_page_map.put(mcid, `val`)
201                }
202            }
203        }
204
205        // Used in code snippet 3.
206        @Throws(PDFNetException::class)
207        fun ProcessStructElement2(element: SElement, mcid_doc_map: Map<Int, Map<Int, String>>, indent: Int) {
208            if (!element.isValid()) {
209                return
210            }
211
212            // Print out the type and title info, if any.
213            PrintIndent(indent)
214            mOutputListener!!.print("<" + element.getType())
215            if (element.hasTitle()) {
216                mOutputListener!!.print(" title=\"" + element.getTitle() + "\"")
217            }
218            mOutputListener!!.print(">")
219            val num: Int = element.getNumKids()
220            for (i in 0 until num) {
221                if (element.isContentItem(i)) {
222                    val cont: ContentItem = element.getAsContentItem(i)
223                    if (cont.getType() == ContentItem.e_MCID) {
224                        val page_num: Int = cont.getPage().getIndex()
225                        if (mcid_doc_map.containsKey(page_num)) {
226                            val mcid_page_map = mcid_doc_map[page_num]!!
227                            val mcid_key: Int = cont.getMCID()
228                            if (mcid_page_map.containsKey(mcid_key)) {
229                                mOutputListener!!.print(mcid_page_map[mcid_key])
230                            }
231                        }
232                    }
233                } else {  // the kid is another StructElement node.
234                    ProcessStructElement2(element.getAsStructElem(i), mcid_doc_map, indent + 1)
235                }
236            }
237            PrintIndent(indent)
238            mOutputListener!!.print("</" + element.getType() + ">")
239        }
240        /**
241         * @param args
242         */
243    }
244
245    init {
246        setTitle(R.string.sample_logicalstructure_title)
247        setDescription(R.string.sample_logicalstructure_description)
248    }
249}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

OfficeToPDF