Extract Image from PDFs - C++ Sample Code

Sample code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI; provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <PDF/PDFNet.h>
7#include <PDF/PDFDoc.h>
8#include <PDF/ElementReader.h>
9#include <PDF/Image.h>
10#include "../../LicenseKey/CPP/LicenseKey.h"
11
12//-----------------------------------------------------------------------------------
13// This sample illustrates one approach to PDF image extraction
14// using PDFNet.
15//
16// Note: Besides direct image export, you can also convert PDF images
17// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
18// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
19// sample project).
20//-----------------------------------------------------------------------------------
21
22#include <iostream>
23#include <iomanip>
24
25using namespace std;
26
27using namespace pdftron;
28using namespace Common;
29using namespace SDF;
30using namespace PDF;
31
32// Relative paths to folders containing test files.
33string input_path = "../../TestFiles/";
34string output_path = "../../TestFiles/Output/";
35
36int image_counter = 0;
37
38void ImageExtract(ElementReader& reader)
39{
40 // Set the precision for printing doubles on cout to 3 decimal places.
41 ios iostate(NULL);
42 iostate.copyfmt(cout);
43 cout << fixed << showpoint << setprecision(3);
44
45 Element element;
46 while ((element = reader.Next()) != 0)
47 {
48 switch (element.GetType())
49 {
50 case Element::e_image:
51 case Element::e_inline_image:
52 {
53 cout << "--> Image: " << ++image_counter << endl;
54 cout << " Width: " << element.GetImageWidth() << endl;
55 cout << " Height: " << element.GetImageHeight() << endl;
56 cout << " BPC: " << element.GetBitsPerComponent() << endl;
57
58 Common::Matrix2D ctm = element.GetCTM();
59 double x2=1, y2=1;
60 ctm.Mult(x2, y2);
61 printf(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f\n", ctm.m_h, ctm.m_v, x2, y2);
62
63 if (element.GetType() == Element::e_image)
64 {
65 Image image(element.GetXObject());
66
67 char fname[256];
68 sprintf(fname, "image_extract1_%d", image_counter);
69
70 string path(output_path + fname);
71 image.Export(path.c_str());
72
73 //string path(output_path + fname + ".tif");
74 //image.ExportAsTiff(path.c_str());
75
76 //string path(output_path + fname + ".png");
77 //image.ExportAsPng(path.c_str());
78 }
79 }
80 break;
81 case Element::e_form: // Process form XObjects
82 reader.FormBegin();
83 ImageExtract(reader);
84 reader.End();
85 break;
86 }
87 }
88
89 // Reset cout's state.
90 cout.copyfmt(iostate);
91}
92
93int main(int argc, char *argv[])
94{
95 int ret = 0;
96
97 // Initialize PDFNet
98 PDFNet::Initialize(LicenseKey);
99
100 // Example 1:
101 // Extract images by traversing the display list for
102 // every page. With this approach it is possible to obtain
103 // image positioning information and DPI.
104 try
105 {
106 PDFDoc doc((input_path + "newsletter.pdf").c_str());
107 doc.InitSecurityHandler();
108 ElementReader reader;
109 // Read every page
110 for (PageIterator itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())
111 {
112 reader.Begin(itr.Current());
113 ImageExtract(reader);
114 reader.End();
115 }
116
117 cout << "Done." << endl;
118 }
119 catch(Common::Exception& e)
120 {
121 cout << e << endl;
122 ret = 1;
123 }
124 catch(...)
125 {
126 cout << "Unknown Exception" << endl;
127 ret = 1;
128 }
129
130 cout << "----------------------------------------------------------------" << endl;
131
132 // Example 2:
133 // Extract images by scanning the low-level document.
134 try
135 {
136 PDFDoc doc((input_path + "newsletter.pdf").c_str());
137
138 doc.InitSecurityHandler();
139 image_counter = 0;
140
141 SDFDoc& cos_doc=doc.GetSDFDoc();
142 int num_objs = cos_doc.XRefSize();
143 for(int i=1; i<num_objs; ++i)
144 {
145 Obj obj = cos_doc.GetObj(i);
146 if(obj && !obj.IsFree() && obj.IsStream())
147 {
148 // Process only images
149 DictIterator itr = obj.Find("Type");
150 if(!itr.HasNext() || strcmp(itr.Value().GetName(), "XObject"))
151 continue;
152
153 itr = obj.Find("Subtype");
154 if(!itr.HasNext() || strcmp(itr.Value().GetName(), "Image"))
155 continue;
156
157 PDF::Image image(obj);
158 cout << "--> Image: " << ++image_counter << endl;
159 cout << " Width: " << image.GetImageWidth() << endl;
160 cout << " Height: " << image.GetImageHeight() << endl;
161 cout << " BPC: " << image.GetBitsPerComponent() << endl;
162
163 char fname[256];
164 sprintf(fname, "image_extract2_%d", image_counter);
165 string path(output_path + fname);
166 image.Export(path.c_str());
167
168 //string path(output_path + fname + ".tif");
169 //image.ExportAsTiff(path.c_str());
170
171 //string path(output_path + fname + ".png");
172 //image.ExportAsPng(path.c_str());
173 }
174 }
175
176 cout << "Done." << endl;
177 }
178 catch(Common::Exception& e)
179 {
180 cout << e << endl;
181 ret = 1;
182 }
183 catch(...)
184 {
185 cout << "Unknown Exception" << endl;
186 ret = 1;
187 }
188
189 PDFNet::Terminate();
190 return ret;
191}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales