ImageExtract

Sample Java, Kotlin code for using Apryse Android SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get started with Android SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples;
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener;
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample;
10import com.pdftron.android.pdfnetsdksamples.R;
11import com.pdftron.android.pdfnetsdksamples.util.Utils;
12import com.pdftron.common.Matrix2D;
13import com.pdftron.common.PDFNetException;
14import com.pdftron.pdf.Element;
15import com.pdftron.pdf.ElementReader;
16import com.pdftron.pdf.Image;
17import com.pdftron.pdf.PDFDoc;
18import com.pdftron.pdf.PageIterator;
19import com.pdftron.sdf.DictIterator;
20import com.pdftron.sdf.Obj;
21import com.pdftron.sdf.SDFDoc;
22
23import java.util.ArrayList;
24
25///-----------------------------------------------------------------------------------
26/// This sample illustrates one approach to PDF image extraction
27/// using PDFNet.
28///
29/// Note: Besides direct image export, you can also convert PDF images
30/// to Java image, or extract uncompressed/compressed image data directly
31/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
32/// sample project).
33///-----------------------------------------------------------------------------------
34
35public class ImageExtractTest extends PDFNetSample {
36
37	private static OutputListener mOutputListener;
38
39	private static ArrayList<String> mFileList = new ArrayList<>();
40
41    public ImageExtractTest() {
42        setTitle(R.string.sample_imageextract_title);
43        setDescription(R.string.sample_imageextract_description);
44
45        // The standard library does not support exporting to
46        // PNG/TIFF formats, thus trying to export the PDF to
47        // PNG or TIFF will fail. Please, comment out this call
48        // if using the full library.
49        // DisableRun();
50    }
51
52	@Override
53	public void run(OutputListener outputListener) {
54		super.run(outputListener);
55		mOutputListener = outputListener;
56		mFileList.clear();
57		printHeader(outputListener);
58        // Initialize PDFNet
59
60        // Example 1:
61        // Extract images by traversing the display list for
62        // every page. With this approach it is possible to obtain
63        // image positioning information and DPI.
64        try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
65            doc.initSecurityHandler();
66            ElementReader reader = new ElementReader();
67            //  Read every page
68            for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
69                reader.begin(itr.next());
70                ImageExtract(reader);
71                reader.end();
72            }
73            mOutputListener.println("Done.");
74        } catch (Exception e) {
75            mOutputListener.printError(e.getStackTrace());
76        }
77
78        mOutputListener.println("----------------------------------------------------------------");
79
80        // Example 2:
81        // Extract images by scanning the low-level document.
82        try (PDFDoc doc = new PDFDoc((Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf").getAbsolutePath()))) {
83            doc.initSecurityHandler();
84            image_counter = 0;
85            SDFDoc cos_doc = doc.getSDFDoc();
86            long num_objs = cos_doc.xRefSize();
87            for (int i = 1; i < num_objs; ++i) {
88                Obj obj = cos_doc.getObj(i);
89                if (obj != null && !obj.isFree() && obj.isStream()) {
90                    // Process only images
91                    DictIterator itr = obj.find("Type");
92                    if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
93                        continue;
94
95                    itr = obj.find("Subtype");
96                    if (!itr.hasNext() || !itr.value().getName().equals("Image"))
97                        continue;
98
99                    Image image = new Image(obj);
100
101                    mOutputListener.println("--> Image: " + (++image_counter));
102                    mOutputListener.println("    Width: " + image.getImageWidth());
103                    mOutputListener.println("    Height: " + image.getImageHeight());
104                    mOutputListener.println("    BPC: " + image.getBitsPerComponent());
105
106                    String fname = "image_extract2_" + image_counter;
107                    String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
108                    image.export(path);
109
110                    //String path= Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
111                    //image.exportAsTiff(path);
112
113                    //String path = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
114                    //image.exportAsPng(path);
115                }
116            }
117            
118            mOutputListener.println("Done.");
119        } catch (Exception e) {
120            mOutputListener.printError(e.getStackTrace());
121        }
122
123		for (String file : mFileList) {
124			addToFileList(file);
125		}
126		printFooter(outputListener);
127	}
128
129    // Relative paths to folders containing test files.
130
131    static int image_counter = 0;
132
133    static void ImageExtract(ElementReader reader) throws PDFNetException {
134        Element element;
135        while ((element = reader.next()) != null) {
136            switch (element.getType()) {
137                case Element.e_image:
138                case Element.e_inline_image: {
139                    mOutputListener.println("--> Image: " + (++image_counter));
140                    mOutputListener.println("    Width: " + element.getImageWidth());
141                    mOutputListener.println("    Height: " + element.getImageHeight());
142                    mOutputListener.println("    BPC: " + element.getBitsPerComponent());
143
144                    Matrix2D ctm = element.getCTM();
145                    double x2 = 1, y2 = 1;
146                    com.pdftron.pdf.Point p = ctm.multPoint(x2, y2);
147                    mOutputListener.println(String.format("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.x, p.y));
148
149                    if (element.getType() == Element.e_image) {
150                        Image image = new Image(element.getXObject());
151
152                        String fname = "image_extract1_" + image_counter;
153
154                        String path = Utils.createExternalFile(fname, mFileList).getAbsolutePath();
155                        image.export(path);
156
157                        //String path2 = Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
158                        //image.exportAsTiff(path2);
159
160                        //String path3 = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
161                        //image.exportAsPng(path3);
162                    }
163                }
164                break;
165                case Element.e_form:        // Process form XObjects
166                    reader.formBegin();
167                    ImageExtract(reader);
168                    reader.end();
169                    break;
170            }
171        }
172    }
173
174}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6package com.pdftron.android.pdfnetsdksamples.samples
7
8import com.pdftron.android.pdfnetsdksamples.OutputListener
9import com.pdftron.android.pdfnetsdksamples.PDFNetSample
10import com.pdftron.android.pdfnetsdksamples.R
11import com.pdftron.android.pdfnetsdksamples.util.Utils
12import com.pdftron.common.PDFNetException
13import com.pdftron.pdf.Element
14import com.pdftron.pdf.ElementReader
15import com.pdftron.pdf.Image
16import com.pdftron.pdf.PDFDoc
17import java.util.*
18
19///-----------------------------------------------------------------------------------
20/// This sample illustrates one approach to PDF image extraction
21/// using PDFNet.
22///
23/// Note: Besides direct image export, you can also convert PDF images
24/// to Java image, or extract uncompressed/compressed image data directly
25/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
26/// sample project).
27///-----------------------------------------------------------------------------------
28
29class ImageExtractTest : PDFNetSample() {
30    init {
31        setTitle(R.string.sample_imageextract_title)
32        setDescription(R.string.sample_imageextract_description)
33
34        // The standard library does not support exporting to
35        // PNG/TIFF formats, thus trying to export the PDF to
36        // PNG or TIFF will fail. Please, comment out this call
37        // if using the full library.
38        // DisableRun();
39    }
40
41    override fun run(outputListener: OutputListener?) {
42        super.run(outputListener)
43        mOutputListener = outputListener
44        mFileList.clear()
45        printHeader(outputListener!!)
46        // Initialize PDFNet
47
48        // Example 1:
49        // Extract images by traversing the display list for
50        // every page. With this approach it is possible to obtain
51        // image positioning information and DPI.
52        // Initialize PDFNet
53
54        // Example 1:
55        // Extract images by traversing the display list for
56        // every page. With this approach it is possible to obtain
57        // image positioning information and DPI.
58        try {
59            PDFDoc(Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf")!!.absolutePath).use { doc ->
60                doc.initSecurityHandler()
61                val reader = ElementReader()
62                //  Read every page
63                val itr = doc.pageIterator
64                while (itr.hasNext()) {
65                    reader.begin(itr.next())
66                    ImageExtract(reader)
67                    reader.end()
68                }
69                mOutputListener!!.println("Done.")
70            }
71        } catch (e: Exception) {
72            mOutputListener!!.printError(e.stackTrace)
73        }
74
75        mOutputListener!!.println("----------------------------------------------------------------")
76
77        // Example 2:
78        // Extract images by scanning the low-level document.
79
80        // Example 2:
81        // Extract images by scanning the low-level document.
82        try {
83            PDFDoc(Utils.getAssetTempFile(INPUT_PATH + "newsletter.pdf")!!.absolutePath).use { doc ->
84                doc.initSecurityHandler()
85                image_counter = 0
86                val cos_doc = doc.sdfDoc
87                val num_objs = cos_doc.xRefSize()
88                for (i in 1 until num_objs) {
89                    val obj = cos_doc.getObj(i)
90                    if (obj != null && !obj.isFree && obj.isStream) {
91                        // Process only images
92                        var itr = obj.find("Type")
93                        if (!itr.hasNext() || itr.value().name != "XObject") continue
94                        itr = obj.find("Subtype")
95                        if (!itr.hasNext() || itr.value().name != "Image") continue
96                        val image = Image(obj)
97                        mOutputListener!!.println("--> Image: " + ++image_counter)
98                        mOutputListener!!.println("    Width: " + image.imageWidth)
99                        mOutputListener!!.println("    Height: " + image.imageHeight)
100                        mOutputListener!!.println("    BPC: " + image.bitsPerComponent)
101                        val fname = "image_extract2_$image_counter"
102                        val path = Utils.createExternalFile(fname, mFileList).absolutePath
103                        image.export(path)
104
105                        //String path= Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
106                        //image.exportAsTiff(path);
107
108                        //String path = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
109                        //image.exportAsPng(path);
110                    }
111                }
112                mOutputListener!!.println("Done.")
113            }
114        } catch (e: Exception) {
115            mOutputListener!!.printError(e.stackTrace)
116        }
117
118        for (file in mFileList) {
119            addToFileList(file!!)
120        }
121        printFooter(outputListener!!)
122    }
123
124    companion object {
125
126        private var mOutputListener: OutputListener? = null
127
128        private val mFileList = ArrayList<String>()
129
130        // Relative paths to folders containing test files.
131
132        internal var image_counter = 0
133
134        @Throws(PDFNetException::class)
135        fun ImageExtract(reader: ElementReader) {
136            var element: Element
137            while (reader.next().also { element = it } != null) {
138                when (element.type) {
139                    Element.e_image, Element.e_inline_image -> {
140                        mOutputListener!!.println("--> Image: " + ++image_counter)
141                        mOutputListener!!.println("    Width: " + element.imageWidth)
142                        mOutputListener!!.println("    Height: " + element.imageHeight)
143                        mOutputListener!!.println("    BPC: " + element.bitsPerComponent)
144                        val ctm = element.ctm
145                        val x2 = 1.0
146                        val y2 = 1.0
147                        val p = ctm.multPoint(x2, y2)
148                        mOutputListener!!.println(String.format("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.h, ctm.v, p.x, p.y))
149                        if (element.type == Element.e_image) {
150                            val image = Image(element.xObject)
151                            val fname = "image_extract1_$image_counter"
152                            val path = Utils.createExternalFile(fname, mFileList).absolutePath
153                            image.export(path)
154
155                            //String path2 = Utils.createExternalFile(fname + ".tif", mFileList).getAbsolutePath();
156                            //image.exportAsTiff(path2);
157
158                            //String path3 = Utils.createExternalFile(fname + ".png", mFileList).getAbsolutePath();
159                            //image.exportAsPng(path3);
160                        }
161                    }
162                    Element.e_form -> {
163                        reader.formBegin()
164                        ImageExtract(reader)
165                        reader.end()
166                    }
167                }
168            }
169        }
170    }
171
172}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

ImageExtract