ElementReader

Sample Java, Kotlin code for using Apryse Android SDK to traverse the page display list using ElementReader.

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get started with Android SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.PDFNetException;
13import com.pdftron.pdf.Element;
14import com.pdftron.pdf.ElementReader;
15import com.pdftron.pdf.PDFDoc;
16import com.pdftron.pdf.PageIterator;
17import com.pdftron.pdf.PathData;
18
19import java.util.ArrayList;
20
21public class ElementReaderTest extends PDFNetSample {
22
23 private static OutputListener mOutputListener;
24
25 private static ArrayList<String> mFileList = new ArrayList<>();
26
27 public ElementReaderTest() {
28 setTitle(R.string.sample_elementreader_title);
29 setDescription(R.string.sample_elementreader_description);
30 }
31
32 @Override
33 public void run(OutputListener outputListener) {
34 super.run(outputListener);
35 mOutputListener = outputListener;
36 mFileList.clear();
37 printHeader(outputListener);
38
39 mOutputListener.println("-------------------------------------------------");
40 mOutputListener.println("Sample 1 - Extract text data from all pages in the document.");
41 mOutputListener.println("Opening the input pdf...");
42
43 try (PDFDoc doc = new PDFDoc(Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath())) // Extract text data from all pages in the document
44 {
45 doc.initSecurityHandler();
46
47 int pgnum = doc.getPageCount();
48
49 PageIterator itr;
50 ElementReader page_reader = new ElementReader();
51
52 for (itr = doc.getPageIterator(); itr.hasNext(); ) // Read every page
53 {
54 page_reader.begin(itr.next());
55 ProcessElements(page_reader);
56 page_reader.end();
57 }
58 mOutputListener.println("Done.");
59 } catch (Exception e) {
60 mOutputListener.printError(e.getStackTrace());
61 }
62
63 for (String file : mFileList) {
64 addToFileList(file);
65 }
66 printFooter(outputListener);
67 }
68
69 static void ProcessElements(ElementReader reader) throws PDFNetException {
70 for (Element element = reader.next(); element != null; element = reader.next()) // Read page contents
71 {
72 switch (element.getType())
73 {
74 case Element.e_path: // Process path data...
75 {
76 PathData data = element.getPathData();
77 byte[] operators = data.getOperators();
78 double[] points = data.getPoints();
79 }
80 break;
81 case Element.e_text: // Process text strings...
82 {
83 String data = element.getTextString();
84 mOutputListener.println(data);
85 }
86 break;
87 case Element.e_form: // Process form XObjects
88 {
89 reader.formBegin();
90 ProcessElements(reader);
91 reader.end();
92 }
93 break;
94 }
95 }
96 }
97
98}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales