ElementReader

Sample Java code for using Apryse SDK to traverse the page display list using ElementReader. Learn more about our Android SDK and PDF Data Extraction SDK Capabilities.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.PDFNetException;
13import com.pdftron.pdf.Element;
14import com.pdftron.pdf.ElementReader;
15import com.pdftron.pdf.PDFDoc;
16import com.pdftron.pdf.PageIterator;
17import com.pdftron.pdf.PathData;
18
19import java.util.ArrayList;
20
21public class ElementReaderTest extends PDFNetSample {
22
23 private static OutputListener mOutputListener;
24
25 private static ArrayList<String> mFileList = new ArrayList<>();
26
27 public ElementReaderTest() {
28 setTitle(R.string.sample_elementreader_title);
29 setDescription(R.string.sample_elementreader_description);
30 }
31
32 @Override
33 public void run(OutputListener outputListener) {
34 super.run(outputListener);
35 mOutputListener = outputListener;
36 mFileList.clear();
37 printHeader(outputListener);
38
39 mOutputListener.println("-------------------------------------------------");
40 mOutputListener.println("Sample 1 - Extract text data from all pages in the document.");
41 mOutputListener.println("Opening the input pdf...");
42
43 try (PDFDoc doc = new PDFDoc(Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath())) // Extract text data from all pages in the document
44 {
45 doc.initSecurityHandler();
46
47 int pgnum = doc.getPageCount();
48
49 PageIterator itr;
50 ElementReader page_reader = new ElementReader();
51
52 for (itr = doc.getPageIterator(); itr.hasNext(); ) // Read every page
53 {
54 page_reader.begin(itr.next());
55 ProcessElements(page_reader);
56 page_reader.end();
57 }
58 mOutputListener.println("Done.");
59 } catch (Exception e) {
60 mOutputListener.printError(e.getStackTrace());
61 }
62
63 for (String file : mFileList) {
64 addToFileList(file);
65 }
66 printFooter(outputListener);
67 }
68
69 static void ProcessElements(ElementReader reader) throws PDFNetException {
70 for (Element element = reader.next(); element != null; element = reader.next()) // Read page contents
71 {
72 switch (element.getType())
73 {
74 case Element.e_path: // Process path data...
75 {
76 PathData data = element.getPathData();
77 byte[] operators = data.getOperators();
78 double[] points = data.getPoints();
79 }
80 break;
81 case Element.e_text: // Process text strings...
82 {
83 String data = element.getTextString();
84 mOutputListener.println(data);
85 }
86 break;
87 case Element.e_form: // Process form XObjects
88 {
89 reader.formBegin();
90 ProcessElements(reader);
91 reader.end();
92 }
93 break;
94 }
95 }
96 }
97
98}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales