Sample Java code for using Apryse SDK to extract text, paths, and images from a PDF. The sample also shows how to do color conversion, image normalization, and process changes in the graphics state. Learn more about our Android SDK and PDF Data Extraction SDK Capabilities.
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.filters.FilterReader;
15import com.pdftron.pdf.CharData;
16import com.pdftron.pdf.CharIterator;
17import com.pdftron.pdf.ColorPt;
18import com.pdftron.pdf.ColorSpace;
19import com.pdftron.pdf.Element;
20import com.pdftron.pdf.ElementReader;
21import com.pdftron.pdf.Font;
22import com.pdftron.pdf.GSChangesIterator;
23import com.pdftron.pdf.GState;
24import com.pdftron.pdf.Image2RGB;
25import com.pdftron.pdf.PDFDoc;
26import com.pdftron.pdf.Page;
27import com.pdftron.pdf.PageIterator;
28import com.pdftron.pdf.PathData;
29import com.pdftron.pdf.PatternColor;
30import com.pdftron.pdf.Shading;
31
32import java.util.ArrayList;
33
34public class ElementReaderAdvTest extends PDFNetSample {
35
36 private static OutputListener mOutputListener;
37
38 private static ArrayList<String> mFileList = new ArrayList<>();
39
40 public ElementReaderAdvTest() {
41 setTitle(R.string.sample_elementreaderadv_title);
42 setDescription(R.string.sample_elementreaderadv_description);
43 }
44
45 @Override
46 public void run(OutputListener outputListener) {
47 super.run(outputListener);
48 mOutputListener = outputListener;
49 mFileList.clear();
50 printHeader(outputListener);
51
52 // string output_path = "../../TestFiles/Output/";
53
54 mOutputListener.println("__________________________________________________");
55 mOutputListener.println("Extract page element information from all ");
56 mOutputListener.println("pages in the document.");
57 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) // Extract text data from all pages in the document
58 {
59 doc.initSecurityHandler();
60
61 int pgnum = doc.getPageCount();
62 PageIterator page_begin = doc.getPageIterator();
63
64 ElementReader page_reader = new ElementReader();
65
66 PageIterator itr;
67
68 for (itr = page_begin; itr.hasNext(); ) // Read every page
69 {
70 Page nextPage = itr.next();
71 mOutputListener.println("Page " + nextPage.getIndex() +
72 "----------------------------------------");
73 page_reader.begin(nextPage);
74 ProcessElements(page_reader);
75 page_reader.end();
76 }
77 mOutputListener.println("Done");
78 } catch (Exception e) {
79 mOutputListener.printError(e.getStackTrace());
80 }
81
82 for (String file : mFileList) {
83 addToFileList(file);
84 }
85 printFooter(outputListener);
86 }
87
88 static String m_buf;
89
90 static void ProcessPath(ElementReader reader, Element path) throws PDFNetException {
91 if (path.isClippingPath()) {
92 mOutputListener.println("This is a clipping path");
93 }
94
95 PathData pathData = path.getPathData();
96 double[] data = pathData.getPoints();
97 byte[] opr = pathData.getOperators();
98
99 double x1, y1, x2, y2, x3, y3;
100 // Use path.getCTM() if you are interested in CTM (current transformation matrix).
101
102 mOutputListener.print(" Path Data Points := \"");
103 int data_index = 0;
104 for (int opr_index = 0; opr_index < opr.length; ++opr_index) {
105 switch (opr[opr_index]) {
106 case PathData.e_moveto:
107 x1 = data[data_index];
108 ++data_index;
109 y1 = data[data_index];
110 ++data_index;
111 mOutputListener.print("M" + x1 + " " + y1);
112 break;
113 case PathData.e_lineto:
114 x1 = data[data_index];
115 ++data_index;
116 y1 = data[data_index];
117 ++data_index;
118 mOutputListener.print(" L" + x1 + " " + y1);
119
120 break;
121 case PathData.e_cubicto:
122 x1 = data[data_index];
123 ++data_index;
124 y1 = data[data_index];
125 ++data_index;
126 x2 = data[data_index];
127 ++data_index;
128 y2 = data[data_index];
129 ++data_index;
130 x3 = data[data_index];
131 ++data_index;
132 y3 = data[data_index];
133 ++data_index;
134 mOutputListener.print(" C" + x1 + " " + y1 + " " + x2 + " " + y2 + " " + x3 + " " + y3);
135 break;
136 case PathData.e_rect: {
137 x1 = data[data_index];
138 ++data_index;
139 y1 = data[data_index];
140 ++data_index;
141 double w = data[data_index];
142 ++data_index;
143 double h = data[data_index];
144 ++data_index;
145 x2 = x1 + w;
146 y2 = y1;
147 x3 = x2;
148 y3 = y1 + h;
149 double x4 = x1;
150 double y4 = y3;
151 mOutputListener.print("M" + x1 + " " + y1 + " L" + x2 + " " + y2 + " L" + x3 + " " + y3 + " L" + x4 + " " + y4 + " Z");
152 }
153 break;
154 case PathData.e_closepath:
155 mOutputListener.println(" Close Path");
156 break;
157 default:
158 throw new PDFNetException("Invalid Element Type", 0, "", "", "");
159 }
160 }
161
162 mOutputListener.print("\" ");
163
164 GState gs = path.getGState();
165
166 // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
167 if (path.isStroked()) {
168 mOutputListener.println("Stroke path");
169
170 if (gs.getStrokeColorSpace().getType() == ColorSpace.e_pattern) {
171 mOutputListener.println("Path has associated pattern");
172 } else {
173 // Get stroke color (you can use PDFNet color conversion facilities)
174 ColorPt rgb = new ColorPt();
175 rgb = gs.getStrokeColor();
176 double v = rgb.get(0);
177 rgb = gs.getStrokeColorSpace().convert2RGB(rgb);
178 v = rgb.get(0);
179 }
180 } else {
181 // Do not stroke path
182 }
183
184 if (path.isFilled()) {
185 mOutputListener.println("Fill path");
186
187 if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern) {
188 mOutputListener.println("Path has associated pattern");
189 PatternColor pat = gs.getFillPattern();
190 int type = pat.getType();
191 if (type == PatternColor.e_shading) {
192 mOutputListener.println("Shading");
193 Shading shading = pat.getShading();
194 if (shading.getType() == Shading.e_function_shading) {
195 mOutputListener.println("FUNCT");
196 } else if (shading.getType() == Shading.e_axial_shading) {
197 mOutputListener.println("AXIAL");
198 } else if (shading.getType() == Shading.e_radial_shading) {
199 mOutputListener.println("RADIAL");
200 }
201 } else if (type == PatternColor.e_colored_tiling_pattern) {
202 mOutputListener.println("e_colored_tiling_pattern");
203 } else if (type == PatternColor.e_uncolored_tiling_pattern) {
204 mOutputListener.println("e_uncolored_tiling_pattern");
205 } else {
206 mOutputListener.println("?");
207 }
208 } else {
209 ColorPt rgb = new ColorPt();
210 rgb = gs.getFillColor();
211 double v = rgb.get(0);
212 rgb = gs.getFillColorSpace().convert2RGB(rgb);
213 v = rgb.get(0);
214 }
215 } else {
216 // Do not fill path
217 }
218
219 // Process any changes in graphics state ---------------------------------
220
221 GSChangesIterator gs_itr = reader.getChangesIterator();
222 while (gs_itr.hasNext()) {
223 switch (gs_itr.next().intValue()) {
224 case GState.e_transform:
225 // Get transform matrix for this element. Unlike path.GetCTM()
226 // that return full transformation matrix gs.GetTransform() return
227 // only the transformation matrix that was installed for this element.
228 //
229 //gs.getTransform();
230 break;
231 case GState.e_line_width:
232 //gs.getLineWidth();
233 break;
234 case GState.e_line_cap:
235 //gs.getLineCap();
236 break;
237 case GState.e_line_join:
238 //gs.getLineJoin();
239 break;
240 case GState.e_flatness:
241 break;
242 case GState.e_miter_limit:
243 //gs.getMiterLimit();
244 break;
245 case GState.e_dash_pattern: {
246 //double[] dashes;
247 //dashes=gs.getDashes();
248 //gs.getPhase();
249 }
250 break;
251 case GState.e_fill_color: {
252 if (gs.getFillColorSpace().getType() == ColorSpace.e_pattern &&
253 gs.getFillPattern().getType() != PatternColor.e_shading) {
254 //process the pattern data
255 reader.patternBegin(true);
256 ProcessElements(reader);
257 reader.end();
258 }
259 }
260 break;
261 }
262 }
263 reader.clearChangeList();
264 }
265
266 static void ProcessText(ElementReader page_reader) throws PDFNetException {
267 // Begin text element
268 mOutputListener.println("Begin Text Block:");
269
270 Element element;
271 while ((element = page_reader.next()) != null) {
272 switch (element.getType()) {
273 case Element.e_text_end:
274 // Finish the text block
275 mOutputListener.println("End Text Block.");
276 return;
277
278 case Element.e_text: {
279 GState gs = element.getGState();
280
281 ColorSpace cs_fill = gs.getFillColorSpace();
282 ColorPt fill = gs.getFillColor();
283
284 ColorPt out;
285 out = cs_fill.convert2RGB(fill);
286
287 ColorSpace cs_stroke = gs.getStrokeColorSpace();
288 ColorPt stroke = gs.getStrokeColor();
289
290 Font font = gs.getFont();
291
292 mOutputListener.println("Font Name: " + font.getName());
293 //font.isFixedWidth();
294 //font.isSerif();
295 //font.isSymbolic();
296 //font.isItalic();
297 // ...
298
299 //double font_size = gs.getFontSize();
300 //double word_spacing = gs.getWordSpacing();
301 //double char_spacing = gs.getCharSpacing();
302 //String txt = element.getTextString();
303
304 if (font.getType() == Font.e_Type3) {
305 //type 3 font, process its data
306 for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
307 page_reader.type3FontBegin(itr.next(), null);
308 ProcessElements(page_reader);
309 page_reader.end();
310 }
311 } else {
312 Matrix2D text_mtx = element.getTextMatrix();
313 double x, y;
314 long char_code;
315
316 for (CharIterator itr = element.getCharIterator(); itr.hasNext(); ) {
317 CharData data = itr.next();
318 char_code = data.getCharCode();
319 //mOutputListener.print("Character code: ");
320
321 mOutputListener.print(String.valueOf(char_code));
322
323 x = data.getGlyphX(); // character positioning information
324 y = data.getGlyphY();
325
326 // Use element.getCTM() if you are interested in the CTM
327 // (current transformation matrix).
328 Matrix2D ctm = element.getCTM();
329
330 // To get the exact character positioning information you need to
331 // concatenate current text matrix with CTM and then multiply
332 // relative positioning coordinates with the resulting matrix.
333 //
334 Matrix2D mtx = ctm.multiply(text_mtx);
335 com.pdftron.pdf.Point t = mtx.multPoint(x, y);
336 x = t.x;
337 y = t.y;
338 //mOutputListener.println(" Position: x=" + x + " y=" + y );
339 }
340
341 mOutputListener.println();
342 }
343 }
344 break;
345 }
346 }
347 }
348
349 static void ProcessImage(Element image) throws PDFNetException {
350 boolean image_mask = image.isImageMask();
351 boolean interpolate = image.isImageInterpolate();
352 int width = image.getImageWidth();
353 int height = image.getImageHeight();
354 int out_data_sz = width * height * 3;
355
356 mOutputListener.println("Image: " +
357 " width=\"" + width + "\""
358 + " height=\"" + height);
359
360 // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
361
362 // You can use GetImageData to read the raw (decoded) image data
363 //image->GetBitsPerComponent();
364 //image->GetImageData(); // get raw image data
365 // .... or use Image2RGB filter that converts every image to RGB format,
366 // This should save you time since you don't need to deal with color conversions,
367 // image up-sampling, decoding etc.
368
369 Image2RGB img_conv = new Image2RGB(image); // Extract and convert image to RGB 8-bpc format
370 FilterReader reader = new FilterReader(img_conv);
371
372 // A buffer used to keep image data.
373 byte[] buf = new byte[out_data_sz];
374 long image_data_out = reader.read(buf);
375 // &image_data_out.front() contains RGB image data.
376
377 // Note that you don't need to read a whole image at a time. Alternatively
378 // you can read a chunk at a time by repeatedly calling reader.Read(buf)
379 // until the function returns 0.
380 }
381
382 static void ProcessElements(ElementReader reader) throws PDFNetException {
383 Element element;
384 while ((element = reader.next()) != null) // Read page contents
385 {
386 switch (element.getType()) {
387 case Element.e_path: // Process path data...
388 {
389 ProcessPath(reader, element);
390 }
391 break;
392 case Element.e_text_begin: // Process text block...
393 {
394 ProcessText(reader);
395 }
396 break;
397 case Element.e_form: // Process form XObjects
398 {
399 reader.formBegin();
400 ProcessElements(reader);
401 reader.end();
402 }
403 break;
404 case Element.e_image: // Process Images
405 {
406 ProcessImage(element);
407 }
408 break;
409 }
410 }
411 }
412
413}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample
10import com.pdftron.android.pdfnetsdksamples.R
11import com.pdftron.android.pdfnetsdksamples.util.Utils
12import com.pdftron.common.PDFNetException
13import com.pdftron.filters.FilterReader
14import com.pdftron.pdf.*
15import java.util.*
16
17class ElementReaderAdvTest : PDFNetSample() {
18 init {
19 setTitle(R.string.sample_elementreaderadv_title)
20 setDescription(R.string.sample_elementreaderadv_description)
21 }
22
23 override fun run(outputListener: OutputListener?) {
24 super.run(outputListener)
25 mOutputListener = outputListener
26 mFileList.clear()
27 printHeader(outputListener!!)
28
29 // string output_path = "../../TestFiles/Output/";
30
31 mOutputListener!!.println("__________________________________________________")
32 mOutputListener!!.println("Extract page element information from all ")
33 mOutputListener!!.println("pages in the document.")
34 // Extract text data from all pages in the document
35 try
36 {
37 PDFDoc(Utils.getAssetTempFile(PDFNetSample.INPUT_PATH + "newsletter.pdf")!!.absolutePath).use { doc ->
38 doc.initSecurityHandler()
39
40 val pgnum = doc.pageCount
41 val page_begin = doc.pageIterator
42
43 val page_reader = ElementReader()
44
45 val itr: PageIterator
46
47 itr = page_begin
48 while (itr.hasNext())
49 // Read every page
50 {
51 val nextPage = itr.next()!!
52 mOutputListener!!.println("Page " + nextPage.getIndex() +
53 "----------------------------------------")
54 page_reader.begin(nextPage)
55 ProcessElements(page_reader)
56 page_reader.end()
57 }
58
59 mOutputListener!!.println("Done")
60 }
61 } catch (e: Exception) {
62 mOutputListener!!.printError(e.stackTrace)
63 }
64
65 for (file in mFileList) {
66 addToFileList(file)
67 }
68 printFooter(outputListener)
69 }
70
71 companion object {
72
73 private var mOutputListener: OutputListener? = null
74
75 private val mFileList = ArrayList<String>()
76
77 internal var m_buf: String? = null
78
79 @Throws(PDFNetException::class)
80 internal fun ProcessPath(reader: ElementReader, path: Element) {
81 if (path.isClippingPath) {
82 mOutputListener!!.println("This is a clipping path")
83 }
84
85 val pathData = path.pathData
86 val data = pathData.points
87 val opr = pathData.operators
88
89 var x1: Double
90 var y1: Double
91 var x2: Double
92 var y2: Double
93 var x3: Double
94 var y3: Double
95 // Use path.getCTM() if you are interested in CTM (current transformation matrix).
96
97 mOutputListener!!.print(" Path Data Points := \"")
98 var data_index = 0
99 for (opr_index in opr.indices) {
100 when (opr[opr_index]) {
101 PathData.e_moveto.toByte() -> {
102 x1 = data[data_index]
103 ++data_index
104 y1 = data[data_index]
105 ++data_index
106 mOutputListener!!.print("M$x1 $y1")
107 }
108 PathData.e_lineto.toByte() -> {
109 x1 = data[data_index]
110 ++data_index
111 y1 = data[data_index]
112 ++data_index
113 mOutputListener!!.print(" L$x1 $y1")
114 }
115 PathData.e_cubicto.toByte() -> {
116 x1 = data[data_index]
117 ++data_index
118 y1 = data[data_index]
119 ++data_index
120 x2 = data[data_index]
121 ++data_index
122 y2 = data[data_index]
123 ++data_index
124 x3 = data[data_index]
125 ++data_index
126 y3 = data[data_index]
127 ++data_index
128 mOutputListener!!.print(" C$x1 $y1 $x2 $y2 $x3 $y3")
129 }
130 PathData.e_rect.toByte() -> {
131 x1 = data[data_index]
132 ++data_index
133 y1 = data[data_index]
134 ++data_index
135 val w = data[data_index]
136 ++data_index
137 val h = data[data_index]
138 ++data_index
139 x2 = x1 + w
140 y2 = y1
141 x3 = x2
142 y3 = y1 + h
143 mOutputListener!!.print("M$x1 $y1 L$x2 $y2 L$x3 $y3 L$x1 $y3 Z")
144 }
145 PathData.e_closepath.toByte() -> mOutputListener!!.println(" Close Path")
146 else -> throw PDFNetException("Invalid Element Type", 0, "", "", "")
147 }
148 }
149
150 mOutputListener!!.print("\" ")
151
152 val gs = path.gState
153
154 // Set Path State 0 (stroke, fill, fill-rule) -----------------------------------
155 if (path.isStroked) {
156 mOutputListener!!.println("Stroke path")
157
158 if (gs.strokeColorSpace.type == ColorSpace.e_pattern) {
159 mOutputListener!!.println("Path has associated pattern")
160 } else {
161 // Get stroke color (you can use PDFNet color conversion facilities)
162 var rgb = ColorPt()
163 rgb = gs.strokeColor
164 var v = rgb.get(0)
165 rgb = gs.strokeColorSpace.convert2RGB(rgb)
166 v = rgb.get(0)
167 }
168 } else {
169 // Do not stroke path
170 }
171
172 if (path.isFilled) {
173 mOutputListener!!.println("Fill path")
174
175 if (gs.fillColorSpace.type == ColorSpace.e_pattern) {
176 mOutputListener!!.println("Path has associated pattern")
177 val pat = gs.fillPattern
178 val type = pat.type
179 if (type == PatternColor.e_shading) {
180 mOutputListener!!.println("Shading")
181 val shading = pat.shading
182 if (shading.type == Shading.e_function_shading) {
183 mOutputListener!!.println("FUNCT")
184 } else if (shading.type == Shading.e_axial_shading) {
185 mOutputListener!!.println("AXIAL")
186 } else if (shading.type == Shading.e_radial_shading) {
187 mOutputListener!!.println("RADIAL")
188 }
189 } else if (type == PatternColor.e_colored_tiling_pattern) {
190 mOutputListener!!.println("e_colored_tiling_pattern")
191 } else if (type == PatternColor.e_uncolored_tiling_pattern) {
192 mOutputListener!!.println("e_uncolored_tiling_pattern")
193 } else {
194 mOutputListener!!.println("?")
195 }
196 } else {
197 var rgb = ColorPt()
198 rgb = gs.fillColor
199 var v = rgb.get(0)
200 rgb = gs.fillColorSpace.convert2RGB(rgb)
201 v = rgb.get(0)
202 }
203 } else {
204 // Do not fill path
205 }
206
207 // Process any changes in graphics state ---------------------------------
208
209 val gs_itr = reader.changesIterator
210 while (gs_itr.hasNext()) {
211 when (gs_itr.next()!!.toInt()) {
212 GState.e_transform -> {
213 }
214 GState.e_line_width -> {
215 }
216 GState.e_line_cap -> {
217 }
218 GState.e_line_join -> {
219 }
220 GState.e_flatness -> {
221 }
222 GState.e_miter_limit -> {
223 }
224 GState.e_dash_pattern -> {
225 //double[] dashes;
226 //dashes=gs.getDashes();
227 //gs.getPhase();
228 }
229 GState.e_fill_color -> {
230 if (gs.fillColorSpace.type == ColorSpace.e_pattern && gs.fillPattern.type != PatternColor.e_shading) {
231 //process the pattern data
232 reader.patternBegin(true)
233 ProcessElements(reader)
234 reader.end()
235 }
236 }
237 }// Get transform matrix for this element. Unlike path.GetCTM()
238 // that return full transformation matrix gs.GetTransform() return
239 // only the transformation matrix that was installed for this element.
240 //
241 //gs.getTransform();
242 //gs.getLineWidth();
243 //gs.getLineCap();
244 //gs.getLineJoin();
245 //gs.getMiterLimit();
246 }
247 reader.clearChangeList()
248 }
249
250 @Throws(PDFNetException::class)
251 internal fun ProcessText(page_reader: ElementReader) {
252 // Begin text element
253 mOutputListener!!.println("Begin Text Block:")
254
255 var element: Element?
256 while (true) {
257 element = page_reader.next()
258 if (element == null) {
259 break
260 }
261 when (element.type) {
262 Element.e_text_end -> {
263 // Finish the text block
264 mOutputListener!!.println("End Text Block.")
265 return
266 }
267
268 Element.e_text -> {
269 val gs = element.gState
270
271 val cs_fill = gs.fillColorSpace
272 val fill = gs.fillColor
273
274 val out: ColorPt
275 out = cs_fill.convert2RGB(fill)
276
277 val cs_stroke = gs.strokeColorSpace
278 val stroke = gs.strokeColor
279
280 val font = gs.font
281
282 mOutputListener!!.println("Font Name: " + font.name)
283 //font.isFixedWidth();
284 //font.isSerif();
285 //font.isSymbolic();
286 //font.isItalic();
287 // ...
288
289 //double font_size = gs.getFontSize();
290 //double word_spacing = gs.getWordSpacing();
291 //double char_spacing = gs.getCharSpacing();
292 //String txt = element.getTextString();
293
294 if (font.type == Font.e_Type3) {
295 //type 3 font, process its data
296 val itr = element.charIterator
297 while (itr.hasNext()) {
298 page_reader.type3FontBegin(itr.next(), null)
299 ProcessElements(page_reader)
300 page_reader.end()
301 }
302 } else {
303 val text_mtx = element.textMatrix
304 var x: Double
305 var y: Double
306 var char_code: Long
307
308 val itr = element.charIterator
309 while (itr.hasNext()) {
310 val data = itr.next()!!
311 char_code = data.getCharCode()
312 //mOutputListener.print("Character code: ");
313
314 mOutputListener!!.print(char_code.toString())
315
316 x = data.getGlyphX() // character positioning information
317 y = data.getGlyphY()
318
319 // Use element.getCTM() if you are interested in the CTM
320 // (current transformation matrix).
321 val ctm = element.ctm
322
323 // To get the exact character positioning information you need to
324 // concatenate current text matrix with CTM and then multiply
325 // relative positioning coordinates with the resulting matrix.
326 //
327 val mtx = ctm.multiply(text_mtx)
328 val t = mtx.multPoint(x, y)
329 x = t.x
330 y = t.y
331 //mOutputListener.println(" Position: x=" + x + " y=" + y );
332 }
333
334 mOutputListener!!.println()
335 }
336 }
337 }
338 }
339 }
340
341 @Throws(PDFNetException::class)
342 internal fun ProcessImage(image: Element) {
343 val image_mask = image.isImageMask
344 val interpolate = image.isImageInterpolate
345 val width = image.imageWidth
346 val height = image.imageHeight
347 val out_data_sz = width * height * 3
348
349 mOutputListener!!.println("Image: " +
350 " width=\"" + width + "\""
351 + " height=\"" + height)
352
353 // Matrix2D& mtx = image->GetCTM(); // image matrix (page positioning info)
354
355 // You can use GetImageData to read the raw (decoded) image data
356 //image->GetBitsPerComponent();
357 //image->GetImageData(); // get raw image data
358 // .... or use Image2RGB filter that converts every image to RGB format,
359 // This should save you time since you don't need to deal with color conversions,
360 // image up-sampling, decoding etc.
361
362 val img_conv = Image2RGB(image) // Extract and convert image to RGB 8-bpc format
363 val reader = FilterReader(img_conv)
364
365 // A buffer used to keep image data.
366 val buf = ByteArray(out_data_sz)
367 val image_data_out = reader.read(buf)
368 // &image_data_out.front() contains RGB image data.
369
370 // Note that you don't need to read a whole image at a time. Alternatively
371 // you can read a chunk at a time by repeatedly calling reader.Read(buf)
372 // until the function returns 0.
373 }
374
375 @Throws(PDFNetException::class)
376 internal fun ProcessElements(reader: ElementReader) {
377 var element: Element?
378 // Read page contents
379 while (true) {
380 element = reader.next()
381 if (element == null) {
382 break
383 }
384 when (element.type) {
385 Element.e_path // Process path data...
386 -> {
387 ProcessPath(reader, element)
388 }
389 Element.e_text_begin // Process text block...
390 -> {
391 ProcessText(reader)
392 }
393 Element.e_form // Process form XObjects
394 -> {
395 reader.formBegin()
396 ProcessElements(reader)
397 reader.end()
398 }
399 Element.e_image // Process Images
400 -> {
401 ProcessImage(element)
402 }
403 }
404 }
405 }
406 }
407
408}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales