ElementReaderAdv

Sample Java code for using Apryse SDK to extract text, paths, and images from a PDF. The sample also shows how to do color conversion, image normalization, and process changes in the graphics state. Learn more about our Android SDK and PDF Data Extraction SDK Capabilities.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.filters.FilterReader;
15import com.pdftron.pdf.CharData;
16import com.pdftron.pdf.CharIterator;
17import com.pdftron.pdf.ColorPt;
18import com.pdftron.pdf.ColorSpace;
19import com.pdftron.pdf.Element;
20import com.pdftron.pdf.ElementReader;
21import com.pdftron.pdf.Font;
22import com.pdftron.pdf.GSChangesIterator;
23import com.pdftron.pdf.GState;
24import com.pdftron.pdf.Image2RGB;
25import com.pdftron.pdf.PDFDoc;
26import com.pdftron.pdf.Page;
27import com.pdftron.pdf.PageIterator;
28import com.pdftron.pdf.PathData;
29import com.pdftron.pdf.PatternColor;
30import com.pdftron.pdf.Shading;
31
32import java.util.ArrayList;
33
34public class ElementReaderAdvTest extends PDFNetSample {
35
36	private static OutputListener mOutputListener;
37
38	private static ArrayList<String> mFileList = new ArrayList<>();
39
40    public ElementReaderAdvTest() {
41        setTitle(R.string.sample_elementreaderadv_title);
42        setDescription(R.string.sample_elementreaderadv_description);
43    }
44
45	@Override
46	public void run(OutputListener outputListener) {
47		super.run(outputListener);
48		mOutputListener = outputListener;
49		mFileList.clear();
50		printHeader(outputListener);
51
52        // string output_path = "../../TestFiles/Output/";
53
54        mOutputListener.println("__________________________________________________");
55        mOutputListener.println("Extract page element information from all ");
56        mOutputListener.println("pages in the document.");
57        try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath())))    // Extract text data from all pages in the document
58        {
59            doc.initSecurityHandler();
60
61            int pgnum = doc.getPageCount();
62            PageIterator page_begin = doc.getPageIterator();
63
64            ElementReader page_reader = new ElementReader();
65
66            PageIterator itr;
67
68            for (itr = page_begin; itr.hasNext(); )        //  Read every page
69            {
70                Page nextPage = itr.next();
71                mOutputListener.println("Page " + nextPage.getIndex() +
72                        "----------------------------------------");
73                page_reader.begin(nextPage);
74                ProcessElements(page_reader);
75                page_reader.end();
76            }
77            mOutputListener.println("Done");
78        } catch (Exception e) {
79            mOutputListener.printError(e.getStackTrace());
80        }
81
82		for (String file : mFileList) {
83			addToFileList(file);
84		}
85		printFooter(outputListener);
86	}
87
88    static String m_buf;
89
90    static void ProcessPath(ElementReader reader, Element path) throws PDFNetException {
91        if (path.isClippingPath()) {
92            mOutputListener.println("This is a clipping path");
93        }
94
95        PathData pathData = path.getPathData();
96        double[] data = pathData.getPoints();
97        byte[] opr = pathData.getOperators();
98
99        double x1, y1, x2, y2, x3, y3;
100        // Use path.getCTM() if you are interested in CTM (current transformation matrix).
101
102        mOutputListener.print(" Path Data Points := \"");
103        int data_index = 0;
104        for (int opr_index = 0; opr_index < opr.length; ++opr_index) {
105            switch (opr[opr_index]) {
106                case PathData.e_moveto:
107                    x1 = data[data_index];
108                    ++data_index;
109                    y1 = data[data_index];
110                    ++data_index;
111                    mOutputListener.print("M" + x1 + " " + y1);
112                    break;
113                case PathData.e_lineto:
114                    x1 = data[data_index];
115                    ++data_index;
116                    y1 = data[data_index];
117                    ++data_index;
118                    mOutputListener.print(" L" + x1 + " " + y1);
119
120                    break;
121                case PathData.e_cubicto:
122                    x1 = data[data_index];
123                    ++data_index;
124                    y1 = data[data_index];
125                    ++data_index;
126                    x2 = data[data_index];
127                    ++data_index;
128                    y2 = data[data_index];
129                    ++data_index;
130                    x3 = data[data_index];
131                    ++data_index;
132                    y3 = data[data_index];
133                    ++data_index;
134                    mOutputListener.print(" C" + x1 + " " + y1 + " " + x2 + " " + y2 + " " + x3 + " " + y3);
135                    break;
136                case PathData.e_rect: {
137                    x1 = data[data_index];
138                    ++data_index;
139                    y1 = data[data_index];
140                    ++data_index;
141                    double w = data[data_index];
142                    ++data_index;
143                    double h = data[data_index];
144                    ++data_index;
145                    x2 = x1 + w;
146                    y2 = y1;
147                    x3 = x2;
148                    y3 = y1 + h;
149                    double x4 = x1;
150                    double y4 = y3;
151                    mOutputListener.print("M" + x1 + " " + y1 + " L" + x2 + " " + y2 + " L" + x3 + " " + y3 + " L" + x4 + " " + y4 + " Z");
152                }
153                break;
154                case PathData.e_closepath:
155                    mOutputListener.println(" Close Path");
156                    break;
157                default:
158                    throw new PDFNetException("Invalid Element Type", 0, "", "", "");
159            }
160        }
161
162        mOutputListener.print("\" ");
163
164        GState gs = path.getGState();
165
166        // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
167        if (path.isStroked()) {
168            mOutputListener.println("Stroke path");
169
170            if (gs.getStrokeColorSpace().getType() == ColorSpace.e_pattern) {
171                mOutputListener.println("Path has associated pattern");
172            } else {
173                // Get stroke color (you can use PDFNet color conversion facilities)
174                ColorPt rgb = new ColorPt();
175                rgb = gs.getStrokeColor();
176                double v = rgb.get(0);
177                rgb = gs.getStrokeColorSpace().convert2RGB(rgb);
178                v = rgb.get(0);
179            }
180        } else {
181            // Do not stroke path
182        }
183
184        if (path.isFilled()) {
185            mOutputListener.println("Fill path");
186
187            if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern) {
188                mOutputListener.println("Path has associated pattern");
189                PatternColor pat = gs.getFillPattern();
190                int type = pat.getType();
191                if (type == PatternColor.e_shading) {
192                    mOutputListener.println("Shading");
193                    Shading shading = pat.getShading();
194                    if (shading.getType() == Shading.e_function_shading) {
195                        mOutputListener.println("FUNCT");
196                    } else if (shading.getType() == Shading.e_axial_shading) {
197                        mOutputListener.println("AXIAL");
198                    } else if (shading.getType() == Shading.e_radial_shading) {
199                        mOutputListener.println("RADIAL");
200                    }
201                } else if (type == PatternColor.e_colored_tiling_pattern) {
202                    mOutputListener.println("e_colored_tiling_pattern");
203                } else if (type == PatternColor.e_uncolored_tiling_pattern) {
204                    mOutputListener.println("e_uncolored_tiling_pattern");
205                } else {
206                    mOutputListener.println("?");
207                }
208            } else {
209                ColorPt rgb = new ColorPt();
210                rgb = gs.getFillColor();
211                double v = rgb.get(0);
212                rgb = gs.getFillColorSpace().convert2RGB(rgb);
213                v = rgb.get(0);
214            }
215        } else {
216            // Do not fill path
217        }
218
219        // Process any changes in graphics state  ---------------------------------
220
221        GSChangesIterator gs_itr = reader.getChangesIterator();
222        while (gs_itr.hasNext()) {
223            switch (gs_itr.next().intValue()) {
224                case GState.e_transform:
225                    // Get transform matrix for this element. Unlike path.GetCTM()
226                    // that return full transformation matrix gs.GetTransform() return
227                    // only the transformation matrix that was installed for this element.
228                    //
229                    //gs.getTransform();
230                    break;
231                case GState.e_line_width:
232                    //gs.getLineWidth();
233                    break;
234                case GState.e_line_cap:
235                    //gs.getLineCap();
236                    break;
237                case GState.e_line_join:
238                    //gs.getLineJoin();
239                    break;
240                case GState.e_flatness:
241                    break;
242                case GState.e_miter_limit:
243                    //gs.getMiterLimit();
244                    break;
245                case GState.e_dash_pattern: {
246                    //double[] dashes;
247                    //dashes=gs.getDashes();
248                    //gs.getPhase();
249                }
250                break;
251                case GState.e_fill_color: {
252                    if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern &&
253                            gs.getFillPattern().getType() != PatternColor.e_shading) {
254                        //process the pattern data
255                        reader.patternBegin(true);
256                        ProcessElements(reader);
257                        reader.end();
258                    }
259                }
260                break;
261            }
262        }
263        reader.clearChangeList();
264    }
265
266    static void ProcessText(ElementReader page_reader) throws PDFNetException {
267        // Begin text element
268        mOutputListener.println("Begin Text Block:");
269
270        Element element;
271        while ((element = page_reader.next()) != null) {
272            switch (element.getType()) {
273                case Element.e_text_end:
274                    // Finish the text block
275                    mOutputListener.println("End Text Block.");
276                    return;
277
278                case Element.e_text: {
279                    GState gs = element.getGState();
280
281                    ColorSpace cs_fill = gs.getFillColorSpace();
282                    ColorPt fill = gs.getFillColor();
283
284                    ColorPt out;
285                    out = cs_fill.convert2RGB(fill);
286
287                    ColorSpace cs_stroke = gs.getStrokeColorSpace();
288                    ColorPt stroke = gs.getStrokeColor();
289
290                    Font font = gs.getFont();
291
292                    mOutputListener.println("Font Name: " + font.getName());
293                    //font.isFixedWidth();
294                    //font.isSerif();
295                    //font.isSymbolic();
296                    //font.isItalic();
297                    // ...
298
299                    //double font_size = gs.getFontSize();
300                    //double word_spacing = gs.getWordSpacing();
301                    //double char_spacing = gs.getCharSpacing();
302                    //String txt = element.getTextString();
303
304                    if (font.getType() == Font.e_Type3) {
305                        //type 3 font, process its data
306                        for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
307                            page_reader.type3FontBegin(itr.next(), null);
308                            ProcessElements(page_reader);
309                            page_reader.end();
310                        }
311                    } else {
312                        Matrix2D text_mtx = element.getTextMatrix();
313                        double x, y;
314                        long char_code;
315
316                        for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
317                            CharData data = itr.next();
318                            char_code = data.getCharCode();
319                            //mOutputListener.print("Character code: ");
320
321                            mOutputListener.print(String.valueOf(char_code));
322
323                            x = data.getGlyphX();        // character positioning information
324                            y = data.getGlyphY();
325
326                            // Use element.getCTM() if you are interested in the CTM
327                            // (current transformation matrix).
328                            Matrix2D ctm = element.getCTM();
329
330                            // To get the exact character positioning information you need to
331                            // concatenate current text matrix with CTM and then multiply
332                            // relative positioning coordinates with the resulting matrix.
333                            //
334                            Matrix2D mtx = ctm.multiply(text_mtx);
335                            com.pdftron.pdf.Point t = mtx.multPoint(x, y);
336                            x = t.x;
337                            y = t.y;
338                            //mOutputListener.println(" Position: x=" + x + " y=" + y );
339                        }
340
341                        mOutputListener.println();
342                    }
343                }
344                break;
345            }
346        }
347    }
348
349    static void ProcessImage(Element image) throws PDFNetException {
350        boolean image_mask = image.isImageMask();
351        boolean interpolate = image.isImageInterpolate();
352        int width = image.getImageWidth();
353        int height = image.getImageHeight();
354        int out_data_sz = width * height * 3;
355
356        mOutputListener.println("Image: " +
357                " width=\"" + width + "\""
358                + " height=\"" + height);
359
360        // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
361
362        // You can use GetImageData to read the raw (decoded) image data
363        //image->GetBitsPerComponent();
364        //image->GetImageData();	// get raw image data
365        // .... or use Image2RGB filter that converts every image to RGB format,
366        // This should save you time since you don't need to deal with color conversions,
367        // image up-sampling, decoding etc.
368
369        Image2RGB img_conv = new Image2RGB(image);    // Extract and convert image to RGB 8-bpc format
370        FilterReader reader = new FilterReader(img_conv);
371
372        // A buffer used to keep image data.
373        byte[] buf = new byte[out_data_sz];
374        long image_data_out = reader.read(buf);
375        // &image_data_out.front() contains RGB image data.
376
377        // Note that you don't need to read a whole image at a time. Alternatively
378        // you can read a chunk at a time by repeatedly calling reader.Read(buf)
379        // until the function returns 0.
380    }
381
382    static void ProcessElements(ElementReader reader) throws PDFNetException {
383        Element element;
384        while ((element = reader.next()) != null)    // Read page contents
385        {
386            switch (element.getType()) {
387                case Element.e_path:                        // Process path data...
388                {
389                    ProcessPath(reader, element);
390                }
391                break;
392                case Element.e_text_begin:                // Process text block...
393                {
394                    ProcessText(reader);
395                }
396                break;
397                case Element.e_form:                        // Process form XObjects
398                {
399                    reader.formBegin();
400                    ProcessElements(reader);
401                    reader.end();
402                }
403                break;
404                case Element.e_image:                        // Process Images
405                {
406                    ProcessImage(element);
407                }
408                break;
409            }
410        }
411    }
412
413}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample
10import com.pdftron.android.pdfnetsdksamples.R
11import com.pdftron.android.pdfnetsdksamples.util.Utils
12import com.pdftron.common.PDFNetException
13import com.pdftron.filters.FilterReader
14import com.pdftron.pdf.*
15import java.util.*
16
17class ElementReaderAdvTest : PDFNetSample() {
18    init {
19        setTitle(R.string.sample_elementreaderadv_title)
20        setDescription(R.string.sample_elementreaderadv_description)
21    }
22
23    override fun run(outputListener: OutputListener?) {
24        super.run(outputListener)
25        mOutputListener = outputListener
26        mFileList.clear()
27        printHeader(outputListener!!)
28
29        // string output_path = "../../TestFiles/Output/";
30
31        mOutputListener!!.println("__________________________________________________")
32        mOutputListener!!.println("Extract page element information from all ")
33        mOutputListener!!.println("pages in the document.")
34        // Extract text data from all pages in the document
35        try
36        {
37            PDFDoc(Utils.getAssetTempFile(PDFNetSample.INPUT_PATH + "newsletter.pdf")!!.absolutePath).use { doc ->
38                doc.initSecurityHandler()
39
40                val pgnum = doc.pageCount
41                val page_begin = doc.pageIterator
42
43                val page_reader = ElementReader()
44
45                val itr: PageIterator
46
47                itr = page_begin
48                while (itr.hasNext())
49                //  Read every page
50                {
51                    val nextPage = itr.next()!!
52                    mOutputListener!!.println("Page " + nextPage.getIndex() +
53                            "----------------------------------------")
54                    page_reader.begin(nextPage)
55                    ProcessElements(page_reader)
56                    page_reader.end()
57                }
58
59                mOutputListener!!.println("Done")
60            }
61        } catch (e: Exception) {
62            mOutputListener!!.printError(e.stackTrace)
63        }
64
65        for (file in mFileList) {
66            addToFileList(file)
67        }
68        printFooter(outputListener)
69    }
70
71    companion object {
72
73        private var mOutputListener: OutputListener? = null
74
75        private val mFileList = ArrayList<String>()
76
77        internal var m_buf: String? = null
78
79        @Throws(PDFNetException::class)
80        internal fun ProcessPath(reader: ElementReader, path: Element) {
81            if (path.isClippingPath) {
82                mOutputListener!!.println("This is a clipping path")
83            }
84
85            val pathData = path.pathData
86            val data = pathData.points
87            val opr = pathData.operators
88
89            var x1: Double
90            var y1: Double
91            var x2: Double
92            var y2: Double
93            var x3: Double
94            var y3: Double
95            // Use path.getCTM() if you are interested in CTM (current transformation matrix).
96
97            mOutputListener!!.print(" Path Data Points := \"")
98            var data_index = 0
99            for (opr_index in opr.indices) {
100                when (opr[opr_index]) {
101                    PathData.e_moveto.toByte() -> {
102                        x1 = data[data_index]
103                        ++data_index
104                        y1 = data[data_index]
105                        ++data_index
106                        mOutputListener!!.print("M$x1 $y1")
107                    }
108                    PathData.e_lineto.toByte() -> {
109                        x1 = data[data_index]
110                        ++data_index
111                        y1 = data[data_index]
112                        ++data_index
113                        mOutputListener!!.print(" L$x1 $y1")
114                    }
115                    PathData.e_cubicto.toByte() -> {
116                        x1 = data[data_index]
117                        ++data_index
118                        y1 = data[data_index]
119                        ++data_index
120                        x2 = data[data_index]
121                        ++data_index
122                        y2 = data[data_index]
123                        ++data_index
124                        x3 = data[data_index]
125                        ++data_index
126                        y3 = data[data_index]
127                        ++data_index
128                        mOutputListener!!.print(" C$x1 $y1 $x2 $y2 $x3 $y3")
129                    }
130                    PathData.e_rect.toByte() -> {
131                        x1 = data[data_index]
132                        ++data_index
133                        y1 = data[data_index]
134                        ++data_index
135                        val w = data[data_index]
136                        ++data_index
137                        val h = data[data_index]
138                        ++data_index
139                        x2 = x1 + w
140                        y2 = y1
141                        x3 = x2
142                        y3 = y1 + h
143                        mOutputListener!!.print("M$x1 $y1 L$x2 $y2 L$x3 $y3 L$x1 $y3 Z")
144                    }
145                    PathData.e_closepath.toByte() -> mOutputListener!!.println(" Close Path")
146                    else -> throw PDFNetException("Invalid Element Type", 0, "", "", "")
147                }
148            }
149
150            mOutputListener!!.print("\" ")
151
152            val gs = path.gState
153
154            // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
155            if (path.isStroked) {
156                mOutputListener!!.println("Stroke path")
157
158                if (gs.strokeColorSpace.type == ColorSpace.e_pattern) {
159                    mOutputListener!!.println("Path has associated pattern")
160                } else {
161                    // Get stroke color (you can use PDFNet color conversion facilities)
162                    var rgb = ColorPt()
163                    rgb = gs.strokeColor
164                    var v = rgb.get(0)
165                    rgb = gs.strokeColorSpace.convert2RGB(rgb)
166                    v = rgb.get(0)
167                }
168            } else {
169                // Do not stroke path
170            }
171
172            if (path.isFilled) {
173                mOutputListener!!.println("Fill path")
174
175                if (gs.fillColorSpace.type == ColorSpace.e_pattern) {
176                    mOutputListener!!.println("Path has associated pattern")
177                    val pat = gs.fillPattern
178                    val type = pat.type
179                    if (type == PatternColor.e_shading) {
180                        mOutputListener!!.println("Shading")
181                        val shading = pat.shading
182                        if (shading.type == Shading.e_function_shading) {
183                            mOutputListener!!.println("FUNCT")
184                        } else if (shading.type == Shading.e_axial_shading) {
185                            mOutputListener!!.println("AXIAL")
186                        } else if (shading.type == Shading.e_radial_shading) {
187                            mOutputListener!!.println("RADIAL")
188                        }
189                    } else if (type == PatternColor.e_colored_tiling_pattern) {
190                        mOutputListener!!.println("e_colored_tiling_pattern")
191                    } else if (type == PatternColor.e_uncolored_tiling_pattern) {
192                        mOutputListener!!.println("e_uncolored_tiling_pattern")
193                    } else {
194                        mOutputListener!!.println("?")
195                    }
196                } else {
197                    var rgb = ColorPt()
198                    rgb = gs.fillColor
199                    var v = rgb.get(0)
200                    rgb = gs.fillColorSpace.convert2RGB(rgb)
201                    v = rgb.get(0)
202                }
203            } else {
204                // Do not fill path
205            }
206
207            // Process any changes in graphics state  ---------------------------------
208
209            val gs_itr = reader.changesIterator
210            while (gs_itr.hasNext()) {
211                when (gs_itr.next()!!.toInt()) {
212                    GState.e_transform -> {
213                    }
214                    GState.e_line_width -> {
215                    }
216                    GState.e_line_cap -> {
217                    }
218                    GState.e_line_join -> {
219                    }
220                    GState.e_flatness -> {
221                    }
222                    GState.e_miter_limit -> {
223                    }
224                    GState.e_dash_pattern -> {
225                        //double[] dashes;
226                        //dashes=gs.getDashes();
227                        //gs.getPhase();
228                    }
229                    GState.e_fill_color -> {
230                        if (gs.fillColorSpace.type == ColorSpace.e_pattern && gs.fillPattern.type != PatternColor.e_shading) {
231                            //process the pattern data
232                            reader.patternBegin(true)
233                            ProcessElements(reader)
234                            reader.end()
235                        }
236                    }
237                }// Get transform matrix for this element. Unlike path.GetCTM()
238                // that return full transformation matrix gs.GetTransform() return
239                // only the transformation matrix that was installed for this element.
240                //
241                //gs.getTransform();
242                //gs.getLineWidth();
243                //gs.getLineCap();
244                //gs.getLineJoin();
245                //gs.getMiterLimit();
246            }
247            reader.clearChangeList()
248        }
249
250        @Throws(PDFNetException::class)
251        internal fun ProcessText(page_reader: ElementReader) {
252            // Begin text element
253            mOutputListener!!.println("Begin Text Block:")
254
255            var element: Element?
256            while (true) {
257                element = page_reader.next()
258                if (element == null) {
259                    break
260                }
261                when (element.type) {
262                    Element.e_text_end -> {
263                        // Finish the text block
264                        mOutputListener!!.println("End Text Block.")
265                        return
266                    }
267
268                    Element.e_text -> {
269                        val gs = element.gState
270
271                        val cs_fill = gs.fillColorSpace
272                        val fill = gs.fillColor
273
274                        val out: ColorPt
275                        out = cs_fill.convert2RGB(fill)
276
277                        val cs_stroke = gs.strokeColorSpace
278                        val stroke = gs.strokeColor
279
280                        val font = gs.font
281
282                        mOutputListener!!.println("Font Name: " + font.name)
283                        //font.isFixedWidth();
284                        //font.isSerif();
285                        //font.isSymbolic();
286                        //font.isItalic();
287                        // ...
288
289                        //double font_size = gs.getFontSize();
290                        //double word_spacing = gs.getWordSpacing();
291                        //double char_spacing = gs.getCharSpacing();
292                        //String txt = element.getTextString();
293
294                        if (font.type == Font.e_Type3) {
295                            //type 3 font, process its data
296                            val itr = element.charIterator
297                            while (itr.hasNext()) {
298                                page_reader.type3FontBegin(itr.next(), null)
299                                ProcessElements(page_reader)
300                                page_reader.end()
301                            }
302                        } else {
303                            val text_mtx = element.textMatrix
304                            var x: Double
305                            var y: Double
306                            var char_code: Long
307
308                            val itr = element.charIterator
309                            while (itr.hasNext()) {
310                                val data = itr.next()!!
311                                char_code = data.getCharCode()
312                                //mOutputListener.print("Character code: ");
313
314                                mOutputListener!!.print(char_code.toString())
315
316                                x = data.getGlyphX()        // character positioning information
317                                y = data.getGlyphY()
318
319                                // Use element.getCTM() if you are interested in the CTM
320                                // (current transformation matrix).
321                                val ctm = element.ctm
322
323                                // To get the exact character positioning information you need to
324                                // concatenate current text matrix with CTM and then multiply
325                                // relative positioning coordinates with the resulting matrix.
326                                //
327                                val mtx = ctm.multiply(text_mtx)
328                                val t = mtx.multPoint(x, y)
329                                x = t.x
330                                y = t.y
331                                //mOutputListener.println(" Position: x=" + x + " y=" + y );
332                            }
333
334                            mOutputListener!!.println()
335                        }
336                    }
337                }
338            }
339        }
340
341        @Throws(PDFNetException::class)
342        internal fun ProcessImage(image: Element) {
343            val image_mask = image.isImageMask
344            val interpolate = image.isImageInterpolate
345            val width = image.imageWidth
346            val height = image.imageHeight
347            val out_data_sz = width * height * 3
348
349            mOutputListener!!.println("Image: " +
350                    " width=\"" + width + "\""
351                    + " height=\"" + height)
352
353            // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
354
355            // You can use GetImageData to read the raw (decoded) image data
356            //image->GetBitsPerComponent();
357            //image->GetImageData();	// get raw image data
358            // .... or use Image2RGB filter that converts every image to RGB format,
359            // This should save you time since you don't need to deal with color conversions,
360            // image up-sampling, decoding etc.
361
362            val img_conv = Image2RGB(image)    // Extract and convert image to RGB 8-bpc format
363            val reader = FilterReader(img_conv)
364
365            // A buffer used to keep image data.
366            val buf = ByteArray(out_data_sz)
367            val image_data_out = reader.read(buf)
368            // &image_data_out.front() contains RGB image data.
369
370            // Note that you don't need to read a whole image at a time. Alternatively
371            // you can read a chunk at a time by repeatedly calling reader.Read(buf)
372            // until the function returns 0.
373        }
374
375        @Throws(PDFNetException::class)
376        internal fun ProcessElements(reader: ElementReader) {
377            var element: Element?
378            // Read page contents
379            while (true) {
380                element = reader.next()
381                if (element == null) {
382                    break
383                }
384                when (element.type) {
385                    Element.e_path                        // Process path data...
386                    -> {
387                        ProcessPath(reader, element)
388                    }
389                    Element.e_text_begin                // Process text block...
390                    -> {
391                        ProcessText(reader)
392                    }
393                    Element.e_form                        // Process form XObjects
394                    -> {
395                        reader.formBegin()
396                        ProcessElements(reader)
397                        reader.end()
398                    }
399                    Element.e_image                        // Process Images
400                    -> {
401                        ProcessImage(element)
402                    }
403                }
404            }
405        }
406    }
407
408}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

ElementReaderAdv