Sample Java, Kotlin code for using Apryse Android SDK to extract text, paths, and images from a PDF. The sample also shows how to do color conversion, image normalization, and process changes in the graphics state.
Learn more about our full PDF Data Extraction SDK Capabilities.
To start your free trial, get started with Android SDK.
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.filters.FilterReader;
15import com.pdftron.pdf.CharData;
16import com.pdftron.pdf.CharIterator;
17import com.pdftron.pdf.ColorPt;
18import com.pdftron.pdf.ColorSpace;
19import com.pdftron.pdf.Element;
20import com.pdftron.pdf.ElementReader;
21import com.pdftron.pdf.Font;
22import com.pdftron.pdf.GSChangesIterator;
23import com.pdftron.pdf.GState;
24import com.pdftron.pdf.Image2RGB;
25import com.pdftron.pdf.PDFDoc;
26import com.pdftron.pdf.Page;
27import com.pdftron.pdf.PageIterator;
28import com.pdftron.pdf.PathData;
29import com.pdftron.pdf.PatternColor;
30import com.pdftron.pdf.Shading;
31
32import java.util.ArrayList;
33
34public class ElementReaderAdvTest extends PDFNetSample {
35
36 private static OutputListener mOutputListener;
37
38 private static ArrayList<String> mFileList = new ArrayList<>();
39
40 public ElementReaderAdvTest() {
41 setTitle(R.string.sample_elementreaderadv_title);
42 setDescription(R.string.sample_elementreaderadv_description);
43 }
44
45 @Override
46 public void run(OutputListener outputListener) {
47 super.run(outputListener);
48 mOutputListener = outputListener;
49 mFileList.clear();
50 printHeader(outputListener);
51
52 // string output_path = "../../TestFiles/Output/";
53
54 mOutputListener.println("__________________________________________________");
55 mOutputListener.println("Extract page element information from all ");
56 mOutputListener.println("pages in the document.");
57 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) // Extract text data from all pages in the document
58 {
59 doc.initSecurityHandler();
60
61 int pgnum = doc.getPageCount();
62 PageIterator page_begin = doc.getPageIterator();
63
64 ElementReader page_reader = new ElementReader();
65
66 PageIterator itr;
67
68 for (itr = page_begin; itr.hasNext(); ) // Read every page
69 {
70 Page nextPage = itr.next();
71 mOutputListener.println("Page " + nextPage.getIndex() +
72 "----------------------------------------");
73 page_reader.begin(nextPage);
74 ProcessElements(page_reader);
75 page_reader.end();
76 }
77 mOutputListener.println("Done");
78 } catch (Exception e) {
79 mOutputListener.printError(e.getStackTrace());
80 }
81
82 for (String file : mFileList) {
83 addToFileList(file);
84 }
85 printFooter(outputListener);
86 }
87
88 static String m_buf;
89
90 static void ProcessPath(ElementReader reader, Element path) throws PDFNetException {
91 if (path.isClippingPath()) {
92 mOutputListener.println("This is a clipping path");
93 }
94
95 PathData pathData = path.getPathData();
96 double[] data = pathData.getPoints();
97 byte[] opr = pathData.getOperators();
98
99 double x1, y1, x2, y2, x3, y3;
100 // Use path.getCTM() if you are interested in CTM (current transformation matrix).
101
102 mOutputListener.print(" Path Data Points := \"");
103 int data_index = 0;
104 for (int opr_index = 0; opr_index < opr.length; ++opr_index) {
105 switch (opr[opr_index]) {
106 case PathData.e_moveto:
107 x1 = data[data_index];
108 ++data_index;
109 y1 = data[data_index];
110 ++data_index;
111 mOutputListener.print("M" + x1 + " " + y1);
112 break;
113 case PathData.e_lineto:
114 x1 = data[data_index];
115 ++data_index;
116 y1 = data[data_index];
117 ++data_index;
118 mOutputListener.print(" L" + x1 + " " + y1);
119
120 break;
121 case PathData.e_cubicto:
122 x1 = data[data_index];
123 ++data_index;
124 y1 = data[data_index];
125 ++data_index;
126 x2 = data[data_index];
127 ++data_index;
128 y2 = data[data_index];
129 ++data_index;
130 x3 = data[data_index];
131 ++data_index;
132 y3 = data[data_index];
133 ++data_index;
134 mOutputListener.print(" C" + x1 + " " + y1 + " " + x2 + " " + y2 + " " + x3 + " " + y3);
135 break;
136 case PathData.e_rect: {
137 x1 = data[data_index];
138 ++data_index;
139 y1 = data[data_index];
140 ++data_index;
141 double w = data[data_index];
142 ++data_index;
143 double h = data[data_index];
144 ++data_index;
145 x2 = x1 + w;
146 y2 = y1;
147 x3 = x2;
148 y3 = y1 + h;
149 double x4 = x1;
150 double y4 = y3;
151 mOutputListener.print("M" + x1 + " " + y1 + " L" + x2 + " " + y2 + " L" + x3 + " " + y3 + " L" + x4 + " " + y4 + " Z");
152 }
153 break;
154 case PathData.e_closepath:
155 mOutputListener.println(" Close Path");
156 break;
157 default:
158 throw new PDFNetException("Invalid Element Type", 0, "", "", "");
159 }
160 }
161
162 mOutputListener.print("\" ");
163
164 GState gs = path.getGState();
165
166 // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
167 if (path.isStroked()) {
168 mOutputListener.println("Stroke path");
169
170 if (gs.getStrokeColorSpace().getType() == ColorSpace.e_pattern) {
171 mOutputListener.println("Path has associated pattern");
172 } else {
173 // Get stroke color (you can use PDFNet color conversion facilities)
174 ColorPt rgb = new ColorPt();
175 rgb = gs.getStrokeColor();
176 double v = rgb.get(0);
177 rgb = gs.getStrokeColorSpace().convert2RGB(rgb);
178 v = rgb.get(0);
179 }
180 } else {
181 // Do not stroke path
182 }
183
184 if (path.isFilled()) {
185 mOutputListener.println("Fill path");
186
187 if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern) {
188 mOutputListener.println("Path has associated pattern");
189 PatternColor pat = gs.getFillPattern();
190 int type = pat.getType();
191 if (type == PatternColor.e_shading) {
192 mOutputListener.println("Shading");
193 Shading shading = pat.getShading();
194 if (shading.getType() == Shading.e_function_shading) {
195 mOutputListener.println("FUNCT");
196 } else if (shading.getType() == Shading.e_axial_shading) {
197 mOutputListener.println("AXIAL");
198 } else if (shading.getType() == Shading.e_radial_shading) {
199 mOutputListener.println("RADIAL");
200 }
201 } else if (type == PatternColor.e_colored_tiling_pattern) {
202 mOutputListener.println("e_colored_tiling_pattern");
203 } else if (type == PatternColor.e_uncolored_tiling_pattern) {
204 mOutputListener.println("e_uncolored_tiling_pattern");
205 } else {
206 mOutputListener.println("?");
207 }
208 } else {
209 ColorPt rgb = new ColorPt();
210 rgb = gs.getFillColor();
211 double v = rgb.get(0);
212 rgb = gs.getFillColorSpace().convert2RGB(rgb);
213 v = rgb.get(0);
214 }
215 } else {
216 // Do not fill path
217 }
218
219 // Process any changes in graphics state ---------------------------------
220
221 GSChangesIterator gs_itr = reader.getChangesIterator();
222 while (gs_itr.hasNext()) {
223 switch (gs_itr.next().intValue()) {
224 case GState.e_transform:
225 // Get transform matrix for this element. Unlike path.GetCTM()
226 // that return full transformation matrix gs.GetTransform() return
227 // only the transformation matrix that was installed for this element.
228 //
229 //gs.getTransform();
230 break;
231 case GState.e_line_width:
232 //gs.getLineWidth();
233 break;
234 case GState.e_line_cap:
235 //gs.getLineCap();
236 break;
237 case GState.e_line_join:
238 //gs.getLineJoin();
239 break;
240 case GState.e_flatness:
241 break;
242 case GState.e_miter_limit:
243 //gs.getMiterLimit();
244 break;
245 case GState.e_dash_pattern: {
246 //double[] dashes;
247 //dashes=gs.getDashes();
248 //gs.getPhase();
249 }
250 break;
251 case GState.e_fill_color: {
252 if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern &&
253 gs.getFillPattern().getType() != PatternColor.e_shading) {
254 //process the pattern data
255 reader.patternBegin(true);
256 ProcessElements(reader);
257 reader.end();
258 }
259 }
260 break;
261 }
262 }
263 reader.clearChangeList();
264 }
265
266 static void ProcessText(ElementReader page_reader) throws PDFNetException {
267 // Begin text element
268 mOutputListener.println("Begin Text Block:");
269
270 Element element;
271 while ((element = page_reader.next()) != null) {
272 switch (element.getType()) {
273 case Element.e_text_end:
274 // Finish the text block
275 mOutputListener.println("End Text Block.");
276 return;
277
278 case Element.e_text: {
279 GState gs = element.getGState();
280
281 ColorSpace cs_fill = gs.getFillColorSpace();
282 ColorPt fill = gs.getFillColor();
283
284 ColorPt out;
285 out = cs_fill.convert2RGB(fill);
286
287 ColorSpace cs_stroke = gs.getStrokeColorSpace();
288 ColorPt stroke = gs.getStrokeColor();
289
290 Font font = gs.getFont();
291
292 mOutputListener.println("Font Name: " + font.getName());
293 //font.isFixedWidth();
294 //font.isSerif();
295 //font.isSymbolic();
296 //font.isItalic();
297 // ...
298
299 //double font_size = gs.getFontSize();
300 //double word_spacing = gs.getWordSpacing();
301 //double char_spacing = gs.getCharSpacing();
302 //String txt = element.getTextString();
303
304 if (font.getType() == Font.e_Type3) {
305 //type 3 font, process its data
306 for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
307 page_reader.type3FontBegin(itr.next(), null);
308 ProcessElements(page_reader);
309 page_reader.end();
310 }
311 } else {
312 Matrix2D text_mtx = element.getTextMatrix();
313 double x, y;
314 long char_code;
315
316 for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
317 CharData data = itr.next();
318 char_code = data.getCharCode();
319 //mOutputListener.print("Character code: ");
320
321 mOutputListener.print(String.valueOf(char_code));
322
323 x = data.getGlyphX(); // character positioning information
324 y = data.getGlyphY();
325
326 // Use element.getCTM() if you are interested in the CTM
327 // (current transformation matrix).
328 Matrix2D ctm = element.getCTM();
329
330 // To get the exact character positioning information you need to
331 // concatenate current text matrix with CTM and then multiply
332 // relative positioning coordinates with the resulting matrix.
333 //
334 Matrix2D mtx = ctm.multiply(text_mtx);
335 com.pdftron.pdf.Point t = mtx.multPoint(x, y);
336 x = t.x;
337 y = t.y;
338 //mOutputListener.println(" Position: x=" + x + " y=" + y );
339 }
340
341 mOutputListener.println();
342 }
343 }
344 break;
345 }
346 }
347 }
348
349 static void ProcessImage(Element image) throws PDFNetException {
350 boolean image_mask = image.isImageMask();
351 boolean interpolate = image.isImageInterpolate();
352 int width = image.getImageWidth();
353 int height = image.getImageHeight();
354 int out_data_sz = width * height * 3;
355
356 mOutputListener.println("Image: " +
357 " width=\"" + width + "\""
358 + " height=\"" + height);
359
360 // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
361
362 // You can use GetImageData to read the raw (decoded) image data
363 //image->GetBitsPerComponent();
364 //image->GetImageData(); // get raw image data
365 // .... or use Image2RGB filter that converts every image to RGB format,
366 // This should save you time since you don't need to deal with color conversions,
367 // image up-sampling, decoding etc.
368
369 Image2RGB img_conv = new Image2RGB(image); // Extract and convert image to RGB 8-bpc format
370 FilterReader reader = new FilterReader(img_conv);
371
372 // A buffer used to keep image data.
373 byte[] buf = new byte[out_data_sz];
374 long image_data_out = reader.read(buf);
375 // &image_data_out.front() contains RGB image data.
376
377 // Note that you don't need to read a whole image at a time. Alternatively
378 // you can read a chunk at a time by repeatedly calling reader.Read(buf)
379 // until the function returns 0.
380 }
381
382 static void ProcessElements(ElementReader reader) throws PDFNetException {
383 Element element;
384 while ((element = reader.next()) != null) // Read page contents
385 {
386 switch (element.getType()) {
387 case Element.e_path: // Process path data...
388 {
389 ProcessPath(reader, element);
390 }
391 break;
392 case Element.e_text_begin: // Process text block...
393 {
394 ProcessText(reader);
395 }
396 break;
397 case Element.e_form: // Process form XObjects
398 {
399 reader.formBegin();
400 ProcessElements(reader);
401 reader.end();
402 }
403 break;
404 case Element.e_image: // Process Images
405 {
406 ProcessImage(element);
407 }
408 break;
409 }
410 }
411 }
412
413}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample
10import com.pdftron.android.pdfnetsdksamples.R
11import com.pdftron.android.pdfnetsdksamples.util.Utils
12import com.pdftron.common.PDFNetException
13import com.pdftron.filters.FilterReader
14import com.pdftron.pdf.*
15import java.util.*
16
17class ElementReaderAdvTest : PDFNetSample() {
18 init {
19 setTitle(R.string.sample_elementreaderadv_title)
20 setDescription(R.string.sample_elementreaderadv_description)
21 }
22
23 override fun run(outputListener: OutputListener?) {
24 super.run(outputListener)
25 mOutputListener = outputListener
26 mFileList.clear()
27 printHeader(outputListener!!)
28
29 // string output_path = "../../TestFiles/Output/";
30
31 mOutputListener!!.println("__________________________________________________")
32 mOutputListener!!.println("Extract page element information from all ")
33 mOutputListener!!.println("pages in the document.")
34 // Extract text data from all pages in the document
35 try
36 {
37 PDFDoc(Utils.getAssetTempFile(PDFNetSample.INPUT_PATH + "newsletter.pdf")!!.absolutePath).use { doc ->
38 doc.initSecurityHandler()
39
40 val pgnum = doc.pageCount
41 val page_begin = doc.pageIterator
42
43 val page_reader = ElementReader()
44
45 val itr: PageIterator
46
47 itr = page_begin
48 while (itr.hasNext())
49 // Read every page
50 {
51 val nextPage = itr.next()!!
52 mOutputListener!!.println("Page " + nextPage.getIndex() +
53 "----------------------------------------")
54 page_reader.begin(nextPage)
55 ProcessElements(page_reader)
56 page_reader.end()
57 }
58
59 mOutputListener!!.println("Done")
60 }
61 } catch (e: Exception) {
62 mOutputListener!!.printError(e.stackTrace)
63 }
64
65 for (file in mFileList) {
66 addToFileList(file)
67 }
68 printFooter(outputListener)
69 }
70
71 companion object {
72
73 private var mOutputListener: OutputListener? = null
74
75 private val mFileList = ArrayList<String>()
76
77 internal var m_buf: String? = null
78
79 @Throws(PDFNetException::class)
80 internal fun ProcessPath(reader: ElementReader, path: Element) {
81 if (path.isClippingPath) {
82 mOutputListener!!.println("This is a clipping path")
83 }
84
85 val pathData = path.pathData
86 val data = pathData.points
87 val opr = pathData.operators
88
89 var x1: Double
90 var y1: Double
91 var x2: Double
92 var y2: Double
93 var x3: Double
94 var y3: Double
95 // Use path.getCTM() if you are interested in CTM (current transformation matrix).
96
97 mOutputListener!!.print(" Path Data Points := \"")
98 var data_index = 0
99 for (opr_index in opr.indices) {
100 when (opr[opr_index]) {
101 PathData.e_moveto.toByte() -> {
102 x1 = data[data_index]
103 ++data_index
104 y1 = data[data_index]
105 ++data_index
106 mOutputListener!!.print("M$x1 $y1")
107 }
108 PathData.e_lineto.toByte() -> {
109 x1 = data[data_index]
110 ++data_index
111 y1 = data[data_index]
112 ++data_index
113 mOutputListener!!.print(" L$x1 $y1")
114 }
115 PathData.e_cubicto.toByte() -> {
116 x1 = data[data_index]
117 ++data_index
118 y1 = data[data_index]
119 ++data_index
120 x2 = data[data_index]
121 ++data_index
122 y2 = data[data_index]
123 ++data_index
124 x3 = data[data_index]
125 ++data_index
126 y3 = data[data_index]
127 ++data_index
128 mOutputListener!!.print(" C$x1 $y1 $x2 $y2 $x3 $y3")
129 }
130 PathData.e_rect.toByte() -> {
131 x1 = data[data_index]
132 ++data_index
133 y1 = data[data_index]
134 ++data_index
135 val w = data[data_index]
136 ++data_index
137 val h = data[data_index]
138 ++data_index
139 x2 = x1 + w
140 y2 = y1
141 x3 = x2
142 y3 = y1 + h
143 mOutputListener!!.print("M$x1 $y1 L$x2 $y2 L$x3 $y3 L$x1 $y3 Z")
144 }
145 PathData.e_closepath.toByte() -> mOutputListener!!.println(" Close Path")
146 else -> throw PDFNetException("Invalid Element Type", 0, "", "", "")
147 }
148 }
149
150 mOutputListener!!.print("\" ")
151
152 val gs = path.gState
153
154 // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
155 if (path.isStroked) {
156 mOutputListener!!.println("Stroke path")
157
158 if (gs.strokeColorSpace.type == ColorSpace.e_pattern) {
159 mOutputListener!!.println("Path has associated pattern")
160 } else {
161 // Get stroke color (you can use PDFNet color conversion facilities)
162 var rgb = ColorPt()
163 rgb = gs.strokeColor
164 var v = rgb.get(0)
165 rgb = gs.strokeColorSpace.convert2RGB(rgb)
166 v = rgb.get(0)
167 }
168 } else {
169 // Do not stroke path
170 }
171
172 if (path.isFilled) {
173 mOutputListener!!.println("Fill path")
174
175 if (gs.fillColorSpace.type == ColorSpace.e_pattern) {
176 mOutputListener!!.println("Path has associated pattern")
177 val pat = gs.fillPattern
178 val type = pat.type
179 if (type == PatternColor.e_shading) {
180 mOutputListener!!.println("Shading")
181 val shading = pat.shading
182 if (shading.type == Shading.e_function_shading) {
183 mOutputListener!!.println("FUNCT")
184 } else if (shading.type == Shading.e_axial_shading) {
185 mOutputListener!!.println("AXIAL")
186 } else if (shading.type == Shading.e_radial_shading) {
187 mOutputListener!!.println("RADIAL")
188 }
189 } else if (type == PatternColor.e_colored_tiling_pattern) {
190 mOutputListener!!.println("e_colored_tiling_pattern")
191 } else if (type == PatternColor.e_uncolored_tiling_pattern) {
192 mOutputListener!!.println("e_uncolored_tiling_pattern")
193 } else {
194 mOutputListener!!.println("?")
195 }
196 } else {
197 var rgb = ColorPt()
198 rgb = gs.fillColor
199 var v = rgb.get(0)
200 rgb = gs.fillColorSpace.convert2RGB(rgb)
201 v = rgb.get(0)
202 }
203 } else {
204 // Do not fill path
205 }
206
207 // Process any changes in graphics state ---------------------------------
208
209 val gs_itr = reader.changesIterator
210 while (gs_itr.hasNext()) {
211 when (gs_itr.next()!!.toInt()) {
212 GState.e_transform -> {
213 }
214 GState.e_line_width -> {
215 }
216 GState.e_line_cap -> {
217 }
218 GState.e_line_join -> {
219 }
220 GState.e_flatness -> {
221 }
222 GState.e_miter_limit -> {
223 }
224 GState.e_dash_pattern -> {
225 //double[] dashes;
226 //dashes=gs.getDashes();
227 //gs.getPhase();
228 }
229 GState.e_fill_color -> {
230 if (gs.fillColorSpace.type == ColorSpace.e_pattern && gs.fillPattern.type != PatternColor.e_shading) {
231 //process the pattern data
232 reader.patternBegin(true)
233 ProcessElements(reader)
234 reader.end()
235 }
236 }
237 }// Get transform matrix for this element. Unlike path.GetCTM()
238 // that return full transformation matrix gs.GetTransform() return
239 // only the transformation matrix that was installed for this element.
240 //
241 //gs.getTransform();
242 //gs.getLineWidth();
243 //gs.getLineCap();
244 //gs.getLineJoin();
245 //gs.getMiterLimit();
246 }
247 reader.clearChangeList()
248 }
249
250 @Throws(PDFNetException::class)
251 internal fun ProcessText(page_reader: ElementReader) {
252 // Begin text element
253 mOutputListener!!.println("Begin Text Block:")
254
255 var element: Element?
256 while (true) {
257 element = page_reader.next()
258 if (element == null) {
259 break
260 }
261 when (element.type) {
262 Element.e_text_end -> {
263 // Finish the text block
264 mOutputListener!!.println("End Text Block.")
265 return
266 }
267
268 Element.e_text -> {
269 val gs = element.gState
270
271 val cs_fill = gs.fillColorSpace
272 val fill = gs.fillColor
273
274 val out: ColorPt
275 out = cs_fill.convert2RGB(fill)
276
277 val cs_stroke = gs.strokeColorSpace
278 val stroke = gs.strokeColor
279
280 val font = gs.font
281
282 mOutputListener!!.println("Font Name: " + font.name)
283 //font.isFixedWidth();
284 //font.isSerif();
285 //font.isSymbolic();
286 //font.isItalic();
287 // ...
288
289 //double font_size = gs.getFontSize();
290 //double word_spacing = gs.getWordSpacing();
291 //double char_spacing = gs.getCharSpacing();
292 //String txt = element.getTextString();
293
294 if (font.type == Font.e_Type3) {
295 //type 3 font, process its data
296 val itr = element.charIterator
297 while (itr.hasNext()) {
298 page_reader.type3FontBegin(itr.next(), null)
299 ProcessElements(page_reader)
300 page_reader.end()
301 }
302 } else {
303 val text_mtx = element.textMatrix
304 var x: Double
305 var y: Double
306 var char_code: Long
307
308 val itr = element.charIterator
309 while (itr.hasNext()) {
310 val data = itr.next()!!
311 char_code = data.getCharCode()
312 //mOutputListener.print("Character code: ");
313
314 mOutputListener!!.print(char_code.toString())
315
316 x = data.getGlyphX() // character positioning information
317 y = data.getGlyphY()
318
319 // Use element.getCTM() if you are interested in the CTM
320 // (current transformation matrix).
321 val ctm = element.ctm
322
323 // To get the exact character positioning information you need to
324 // concatenate current text matrix with CTM and then multiply
325 // relative positioning coordinates with the resulting matrix.
326 //
327 val mtx = ctm.multiply(text_mtx)
328 val t = mtx.multPoint(x, y)
329 x = t.x
330 y = t.y
331 //mOutputListener.println(" Position: x=" + x + " y=" + y );
332 }
333
334 mOutputListener!!.println()
335 }
336 }
337 }
338 }
339 }
340
341 @Throws(PDFNetException::class)
342 internal fun ProcessImage(image: Element) {
343 val image_mask = image.isImageMask
344 val interpolate = image.isImageInterpolate
345 val width = image.imageWidth
346 val height = image.imageHeight
347 val out_data_sz = width * height * 3
348
349 mOutputListener!!.println("Image: " +
350 " width=\"" + width + "\""
351 + " height=\"" + height)
352
353 // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
354
355 // You can use GetImageData to read the raw (decoded) image data
356 //image->GetBitsPerComponent();
357 //image->GetImageData(); // get raw image data
358 // .... or use Image2RGB filter that converts every image to RGB format,
359 // This should save you time since you don't need to deal with color conversions,
360 // image up-sampling, decoding etc.
361
362 val img_conv = Image2RGB(image) // Extract and convert image to RGB 8-bpc format
363 val reader = FilterReader(img_conv)
364
365 // A buffer used to keep image data.
366 val buf = ByteArray(out_data_sz)
367 val image_data_out = reader.read(buf)
368 // &image_data_out.front() contains RGB image data.
369
370 // Note that you don't need to read a whole image at a time. Alternatively
371 // you can read a chunk at a time by repeatedly calling reader.Read(buf)
372 // until the function returns 0.
373 }
374
375 @Throws(PDFNetException::class)
376 internal fun ProcessElements(reader: ElementReader) {
377 var element: Element?
378 // Read page contents
379 while (true) {
380 element = reader.next()
381 if (element == null) {
382 break
383 }
384 when (element.type) {
385 Element.e_path // Process path data...
386 -> {
387 ProcessPath(reader, element)
388 }
389 Element.e_text_begin // Process text block...
390 -> {
391 ProcessText(reader)
392 }
393 Element.e_form // Process form XObjects
394 -> {
395 reader.formBegin()
396 ProcessElements(reader)
397 reader.end()
398 }
399 Element.e_image // Process Images
400 -> {
401 ProcessImage(element)
402 }
403 }
404 }
405 }
406 }
407
408}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales