ImageExtract

Sample Java code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our Android SDK and PDF Data Extraction SDK Capabilities.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.pdf.Element;
15import com.pdftron.pdf.ElementReader;
16import com.pdftron.pdf.Image;
17import com.pdftron.pdf.PDFDoc;
18import com.pdftron.pdf.PageIterator;
19import com.pdftron.sdf.DictIterator;
20import com.pdftron.sdf.Obj;
21import com.pdftron.sdf.SDFDoc;
22
23import java.util.ArrayList;
24
25///-----------------------------------------------------------------------------------
26/// This sample illustrates one approach to PDF image extraction
27/// using PDFNet.
28///
29/// Note: Besides direct image export, you can also convert PDF images
30/// to Java image, or extract uncompressed/compressed image data directly
31/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
32/// sample project).
33///-----------------------------------------------------------------------------------
34
35public class ImageExtractTest extends PDFNetSample {
36
37 private static OutputListener mOutputListener;
38
39 private static ArrayList<String> mFileList = new ArrayList<>();
40
41 public ImageExtractTest() {
42 setTitle(R.string.sample_imageextract_title);
43 setDescription(R.string.sample_imageextract_description);
44
45 // The standard library does not support exporting to
46 // PNG/TIFF formats, thus trying to export the PDF to
47 // PNG or TIFF will fail. Please, comment out this call
48 // if using the full library.
49 // DisableRun();
50 }
51
52 @Override
53 public void run(OutputListener outputListener) {
54 super.run(outputListener);
55 mOutputListener = outputListener;
56 mFileList.clear();
57 printHeader(outputListener);
58 // Initialize PDFNet
59
60 // Example 1:
61 // Extract images by traversing the display list for
62 // every page. With this approach it is possible to obtain
63 // image positioning information and DPI.
64 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
65 doc.initSecurityHandler();
66 ElementReader reader = new ElementReader();
67 // Read every page
68 for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
69 reader.begin(itr.next());
70 ImageExtract(reader);
71 reader.end();
72 }
73 mOutputListener.println("Done.");
74 } catch (Exception e) {
75 mOutputListener.printError(e.getStackTrace());
76 }
77
78 mOutputListener.println("----------------------------------------------------------------");
79
80 // Example 2:
81 // Extract images by scanning the low-level document.
82 try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
83 doc.initSecurityHandler();
84 image_counter = 0;
85 SDFDoc cos_doc = doc.getSDFDoc();
86 long num_objs = cos_doc.xRefSize();
87 for (int i = 1; i < num_objs; ++i) {
88 Obj obj = cos_doc.getObj(i);
89 if (obj != null && !obj.isFree() && obj.isStream()) {
90 // Process only images
91 DictIterator itr = obj.find("Type");
92 if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
93 continue;
94
95 itr = obj.find("Subtype");
96 if (!itr.hasNext() || !itr.value().getName().equals("Image"))
97 continue;
98
99 Image image = new Image(obj);
100
101 mOutputListener.println("--> Image: " + (++image_counter));
102 mOutputListener.println(" Width: " + image.getImageWidth());
103 mOutputListener.println(" Height: " + image.getImageHeight());
104 mOutputListener.println(" BPC: " + image.getBitsPerComponent());
105
106 String fname = "image_extract2_" + image_counter;
107 String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
108 image.export(path);
109
110 //String path= Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
111 //image.exportAsTiff(path);
112
113 //String path = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
114 //image.exportAsPng(path);
115 }
116 }
117
118 mOutputListener.println("Done.");
119 } catch (Exception e) {
120 mOutputListener.printError(e.getStackTrace());
121 }
122
123 for (String file : mFileList) {
124 addToFileList(file);
125 }
126 printFooter(outputListener);
127 }
128
129 // Relative paths to folders containing test files.
130
131 static int image_counter = 0;
132
133 static void ImageExtract(ElementReader reader) throws PDFNetException {
134 Element element;
135 while ((element = reader.next()) != null) {
136 switch (element.getType()) {
137 case Element.e_image:
138 case Element.e_inline_image: {
139 mOutputListener.println("--> Image: " + (++image_counter));
140 mOutputListener.println(" Width: " + element.getImageWidth());
141 mOutputListener.println(" Height: " + element.getImageHeight());
142 mOutputListener.println(" BPC: " + element.getBitsPerComponent());
143
144 Matrix2D ctm = element.getCTM();
145 double x2 = 1, y2 = 1;
146 com.pdftron.pdf.Point p = ctm.multPoint(x2, y2);
147 mOutputListener.println(String.format(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.x, p.y));
148
149 if (element.getType() == Element.e_image) {
150 Image image = new Image(element.getXObject());
151
152 String fname = "image_extract1_" + image_counter;
153
154 String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
155 image.export(path);
156
157 //String path2 = Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
158 //image.exportAsTiff(path2);
159
160 //String path3 = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
161 //image.exportAsPng(path3);
162 }
163 }
164 break;
165 case Element.e_form: // Process form XObjects
166 reader.formBegin();
167 ImageExtract(reader);
168 reader.end();
169 break;
170 }
171 }
172 }
173
174}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales