ImageExtract

Sample Java, Kotlin code for using Apryse Android SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get started with Android SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.pdf.Element;
15import com.pdftron.pdf.ElementReader;
16import com.pdftron.pdf.Image;
17import com.pdftron.pdf.PDFDoc;
18import com.pdftron.pdf.PageIterator;
19import com.pdftron.sdf.DictIterator;
20import com.pdftron.sdf.Obj;
21import com.pdftron.sdf.SDFDoc;
22
23import java.util.ArrayList;
24
25///-----------------------------------------------------------------------------------
26/// This sample illustrates one approach to PDF image extraction
27/// using PDFNet.
28///
29/// Note: Besides direct image export, you can also convert PDF images
30/// to Java image, or extract uncompressed/compressed image data directly
31/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
32/// sample project).
33///-----------------------------------------------------------------------------------
34
35public class ImageExtractTest extends PDFNetSample {
36
37 private static OutputListener mOutputListener;
38
39 private static ArrayList<String> mFileList = new ArrayList<>();
40
41 public ImageExtractTest() {
42 setTitle(R.string.sample_imageextract_title);
43 setDescription(R.string.sample_imageextract_description);
44
45 // The standard library does not support exporting to
46 // PNG/TIFF formats, thus trying to export the PDF to
47 // PNG or TIFF will fail. Please, comment out this call
48 // if using the full library.
49 // DisableRun();
50 }
51
52 @Override
53 public void run(OutputListener outputListener) {
54 super.run(outputListener);
55 mOutputListener = outputListener;
56 mFileList.clear();
57 printHeader(outputListener);
58 // Initialize PDFNet
59
60 // Example 1:
61 // Extract images by traversing the display list for
62 // every page. With this approach it is possible to obtain
63 // image positioning information and DPI.
64 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
65 doc.initSecurityHandler();
66 ElementReader reader = new ElementReader();
67 // Read every page
68 for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
69 reader.begin(itr.next());
70 ImageExtract(reader);
71 reader.end();
72 }
73 mOutputListener.println("Done.");
74 } catch (Exception e) {
75 mOutputListener.printError(e.getStackTrace());
76 }
77
78 mOutputListener.println("----------------------------------------------------------------");
79
80 // Example 2:
81 // Extract images by scanning the low-level document.
82 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
83 doc.initSecurityHandler();
84 image_counter = 0;
85 SDFDoc cos_doc = doc.getSDFDoc();
86 long num_objs = cos_doc.xRefSize();
87 for (int i = 1; i < num_objs; ++i) {
88 Obj obj = cos_doc.getObj(i);
89 if (obj != null && !obj.isFree() && obj.isStream()) {
90 // Process only images
91 DictIterator itr = obj.find("Type");
92 if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
93 continue;
94
95 itr = obj.find("Subtype");
96 if (!itr.hasNext() || !itr.value().getName().equals("Image"))
97 continue;
98
99 Image image = new Image(obj);
100
101 mOutputListener.println("--> Image: " + (++image_counter));
102 mOutputListener.println(" Width: " + image.getImageWidth());
103 mOutputListener.println(" Height: " + image.getImageHeight());
104 mOutputListener.println(" BPC: " + image.getBitsPerComponent());
105
106 String fname = "image_extract2_" + image_counter;
107 String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
108 image.export(path);
109
110 //String path= Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
111 //image.exportAsTiff(path);
112
113 //String path = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
114 //image.exportAsPng(path);
115 }
116 }
117
118 mOutputListener.println("Done.");
119 } catch (Exception e) {
120 mOutputListener.printError(e.getStackTrace());
121 }
122
123 for (String file : mFileList) {
124 addToFileList(file);
125 }
126 printFooter(outputListener);
127 }
128
129 // Relative paths to folders containing test files.
130
131 static int image_counter = 0;
132
133 static void ImageExtract(ElementReader reader) throws PDFNetException {
134 Element element;
135 while ((element = reader.next()) != null) {
136 switch (element.getType()) {
137 case Element.e_image:
138 case Element.e_inline_image: {
139 mOutputListener.println("--> Image: " + (++image_counter));
140 mOutputListener.println(" Width: " + element.getImageWidth());
141 mOutputListener.println(" Height: " + element.getImageHeight());
142 mOutputListener.println(" BPC: " + element.getBitsPerComponent());
143
144 Matrix2D ctm = element.getCTM();
145 double x2 = 1, y2 = 1;
146 com.pdftron.pdf.Point p = ctm.multPoint(x2, y2);
147 mOutputListener.println(String.format(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.x, p.y));
148
149 if (element.getType() == Element.e_image) {
150 Image image = new Image(element.getXObject());
151
152 String fname = "image_extract1_" + image_counter;
153
154 String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
155 image.export(path);
156
157 //String path2 = Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
158 //image.exportAsTiff(path2);
159
160 //String path3 = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
161 //image.exportAsPng(path3);
162 }
163 }
164 break;
165 case Element.e_form: // Process form XObjects
166 reader.formBegin();
167 ImageExtract(reader);
168 reader.end();
169 break;
170 }
171 }
172 }
173
174}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales