ImageExtract

Sample code in Swift and Obj-C for using Apryse iOS SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with iOS SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9//-----------------------------------------------------------------------------------
10// This sample illustrates one approach to PDF image extraction
11// using PDFNet.
12//
13// Note: Besides direct image export, you can also convert PDF images
14// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
15// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
16// sample project).
17//-----------------------------------------------------------------------------------
18
19int image_counter = 0;
20
21void ImageExtract(PTElementReader* reader)
22{
23 PTElement *element;
24 while ((element = [reader Next]) != NULL)
25 {
26 switch ([element GetType])
27 {
28 case e_ptimage:
29 case e_ptinline_image:
30 {
31 NSLog(@"--> Image: %d", ++image_counter);
32 NSLog(@" Width: %d", [element GetImageWidth]);
33 NSLog(@" Height: %d", [element GetImageHeight]);
34 NSLog(@" BPC: %d", [element GetBitsPerComponent]);
35
36 PTMatrix2D *ctm = [element GetCTM];
37 double x2=1, y2=1;
38 PTPDFPoint* p = [ctm Mult: [[PTPDFPoint alloc] initWithPx: x2 py: y2]];
39 NSLog(@" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", [ctm getM_h], [ctm getM_v], [p getX], [p getY]);
40
41 if ([element GetType] == e_ptimage)
42 {
43 PTImage *image = [[PTImage alloc] initWithImage_xobject: [element GetXObject]];
44
45 NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract1_%d", image_counter]];
46 [image ExportToFile: path];
47 }
48 }
49 break;
50 case e_ptform: // Process form XObjects
51 [reader FormBegin];
52 ImageExtract(reader);
53 [reader End];
54 break;
55 default:
56 break;
57 }
58 }
59}
60
61int main(int argc, char *argv[])
62{
63 @autoreleasepool {
64 int ret = 0;
65
66 // Initialize PDFNet
67 [PTPDFNet Initialize: 0];
68
69 // Example 1:
70 // Extract images by traversing the display list for
71 // every page. With this approach it is possible to obtain
72 // image positioning information and DPI.
73 @try
74 {
75 PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
76 [doc InitSecurityHandler];
77 PTElementReader *reader = [[PTElementReader alloc] init];
78 // Read every page
79 PTPageIterator *itr;
80 for (itr=[doc GetPageIterator: 1]; [itr HasNext]; [itr Next])
81 {
82 [reader Begin: [itr Current]];
83 ImageExtract(reader);
84 [reader End];
85 }
86
87 NSLog(@"Done.");
88 }
89 @catch(NSException* e)
90 {
91 NSLog(@"%@", e.reason);
92 ret = 1;
93 }
94
95 NSLog(@"----------------------------------------------------------------");
96
97 // Example 2:
98 // Extract images by scanning the low-level document.
99 @try
100 {
101 PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
102
103 [doc InitSecurityHandler];
104 image_counter = 0;
105
106 PTSDFDoc *cos_doc=[doc GetSDFDoc];
107 int num_objs = [cos_doc XRefSize];
108 int i;
109 for(i=1; i<num_objs; ++i)
110 {
111 PTObj * obj = [cos_doc GetObj: i];
112 if((obj != NULL) && (![obj IsFree]) && ([obj IsStream]))
113 {
114 // Process only images
115 PTDictIterator *itr = [obj Find: @"Type"];
116 if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"XObject"]))
117 continue;
118
119 itr = [obj Find: @"Subtype"];
120 if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"Image"]))
121 continue;
122
123 PTImage *image = [[PTImage alloc] initWithImage_xobject: obj];
124 NSLog(@"--> Image: %d", ++image_counter);
125 NSLog(@" Width: %d", [image GetImageWidth]);
126 NSLog(@" Height: %d", [image GetImageHeight]);
127 NSLog(@" BPC: %d", [image GetBitsPerComponent]);
128
129 NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract2_%d", image_counter]];
130 [image ExportToFile: path];
131 }
132 }
133 NSLog(@"Done.");
134 }
135 @catch(NSException* e)
136 {
137 NSLog(@"%@", e.reason);
138 ret = 1;
139 }
140 [PTPDFNet Terminate: 0];
141 return ret;
142 }
143}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales