Extract Image from PDFs - Java Sample Code

Sample code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI; provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.Matrix2D;
7import com.pdftron.common.PDFNetException;
8import com.pdftron.pdf.*;
9import com.pdftron.sdf.DictIterator;
10import com.pdftron.sdf.Obj;
11import com.pdftron.sdf.SDFDoc;
12
13///-----------------------------------------------------------------------------------
14/// This sample illustrates one approach to PDF image extraction
15/// using PDFNet.
16///
17/// Note: Besides direct image export, you can also convert PDF images
18/// to Java image, or extract uncompressed/compressed image data directly
19/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
20/// sample project).
21///-----------------------------------------------------------------------------------
22public class ImageExtractTest {
23
24 // Relative paths to folders containing test files.
25 static String input_path = "../../TestFiles/";
26 static String output_path = "../../TestFiles/Output/";
27
28 static int image_counter = 0;
29
30 static void ImageExtract(ElementReader reader) throws PDFNetException {
31 Element element;
32 while ((element = reader.next()) != null) {
33 switch (element.getType()) {
34 case Element.e_image:
35 case Element.e_inline_image: {
36 System.out.println("--> Image: " + (++image_counter));
37 System.out.println(" Width: " + element.getImageWidth());
38 System.out.println(" Height: " + element.getImageHeight());
39 System.out.println(" BPC: " + element.getBitsPerComponent());
40
41 Matrix2D ctm = element.getCTM();
42 double x2 = 1, y2 = 1;
43 java.awt.geom.Point2D.Double p = ctm.multPoint(x2, y2);
44 System.out.println(String.format(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.getX(), p.getY()));
45
46 if (element.getType() == Element.e_image) {
47 Image image = new Image(element.getXObject());
48
49 String fname = "image_extract1_" + image_counter;
50
51 String path = output_path + fname;
52 image.export(path);
53
54 //String path2 = output_path + fname + ".tif";
55 //image.exportAsTiff(path2);
56
57 //String path3 = output_path + fname + ".png";
58 //image.exportAsPng(path3);
59 }
60 }
61 break;
62 case Element.e_form: // Process form XObjects
63 reader.formBegin();
64 ImageExtract(reader);
65 reader.end();
66 break;
67 }
68 }
69 }
70
71 public static void main(String[] args) {
72 // Initialize PDFNet
73 PDFNet.initialize(PDFTronLicense.Key());
74
75 // Example 1:
76 // Extract images by traversing the display list for
77 // every page. With this approach it is possible to obtain
78 // image positioning information and DPI.
79 try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
80 doc.initSecurityHandler();
81 ElementReader reader = new ElementReader();
82 // Read every page
83 for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
84 reader.begin(itr.next());
85 ImageExtract(reader);
86 reader.end();
87 }
88 System.out.println("Done.");
89 } catch (Exception e) {
90 e.printStackTrace();
91 }
92
93
94 System.out.println("----------------------------------------------------------------");
95
96 // Example 2:
97 // Extract images by scanning the low-level document.
98 try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
99 doc.initSecurityHandler();
100 image_counter = 0;
101 SDFDoc cos_doc = doc.getSDFDoc();
102 long num_objs = cos_doc.xRefSize();
103 for (int i = 1; i < num_objs; ++i) {
104 Obj obj = cos_doc.getObj(i);
105 if (obj != null && !obj.isFree() && obj.isStream()) {
106 // Process only images
107 DictIterator itr = obj.find("Type");
108 if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
109 continue;
110
111 itr = obj.find("Subtype");
112 if (!itr.hasNext() || !itr.value().getName().equals("Image"))
113 continue;
114
115 Image image = new Image(obj);
116
117 System.out.println("--> Image: " + (++image_counter));
118 System.out.println(" Width: " + image.getImageWidth());
119 System.out.println(" Height: " + image.getImageHeight());
120 System.out.println(" BPC: " + image.getBitsPerComponent());
121
122 String fname = "image_extract2_" + image_counter;
123 String path = output_path + fname;
124 image.export(path);
125
126 //String path= output_path + fname + ".tif";
127 //image.exportAsTiff(path);
128
129 //String path = output_path + fname + ".png";
130 //image.exportAsPng(path);
131 }
132 }
133
134 System.out.println("Done.");
135 } catch (Exception e) {
136 e.printStackTrace();
137 }
138
139 PDFNet.terminate();
140 }
141}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales