ImageExtract

Sample code in Swift and Obj-C for using Apryse iOS SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with iOS SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9//-----------------------------------------------------------------------------------
10// This sample illustrates one approach to PDF image extraction 
11// using PDFNet.
12// 
13// Note: Besides direct image export, you can also convert PDF images 
14// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
15// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
16// sample project).
17//-----------------------------------------------------------------------------------
18
19int image_counter = 0;
20
21void ImageExtract(PTElementReader* reader)
22{
23    PTElement *element; 
24    while ((element = [reader Next]) != NULL)
25    {
26        switch ([element GetType]) 
27        {
28        case e_ptimage:
29        case e_ptinline_image: 
30            {
31                NSLog(@"--> Image: %d", ++image_counter);
32                NSLog(@"    Width: %d", [element GetImageWidth]);
33                NSLog(@"    Height: %d", [element GetImageHeight]);
34                NSLog(@"    BPC: %d", [element GetBitsPerComponent]);
35
36                PTMatrix2D *ctm = [element GetCTM];
37                double x2=1, y2=1;
38                PTPDFPoint* p = [ctm Mult: [[PTPDFPoint alloc] initWithPx: x2 py: y2]];
39                NSLog(@"    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", [ctm getM_h], [ctm getM_v], [p getX], [p getY]);
40
41                if ([element GetType] == e_ptimage) 
42                {
43                    PTImage *image = [[PTImage alloc] initWithImage_xobject: [element GetXObject]];
44                    
45                    NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract1_%d", image_counter]];
46                    [image ExportToFile: path];
47                }
48            }
49            break;
50        case e_ptform: // Process form XObjects
51            [reader FormBegin]; 
52            ImageExtract(reader);
53            [reader End]; 
54            break;
55        default:
56            break;
57        }
58    }
59}
60
61int main(int argc, char *argv[])
62{
63    @autoreleasepool {
64        int ret = 0;
65
66        // Initialize PDFNet
67        [PTPDFNet Initialize: 0];
68
69        // Example 1: 
70        // Extract images by traversing the display list for 
71        // every page. With this approach it is possible to obtain 
72        // image positioning information and DPI.
73        @try  
74        { 
75            PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
76            [doc InitSecurityHandler];
77            PTElementReader *reader = [[PTElementReader alloc] init];
78            //  Read every page
79            PTPageIterator *itr;
80            for (itr=[doc GetPageIterator: 1]; [itr HasNext]; [itr Next]) 
81            {
82                [reader Begin: [itr Current]];
83                ImageExtract(reader);
84                [reader End];
85            }
86
87            NSLog(@"Done.");
88        }
89        @catch(NSException* e)
90        {
91            NSLog(@"%@", e.reason);
92            ret = 1;
93        }
94
95        NSLog(@"----------------------------------------------------------------");
96
97        // Example 2: 
98        // Extract images by scanning the low-level document.
99        @try  
100        { 
101            PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
102        
103            [doc InitSecurityHandler];
104            image_counter = 0;
105
106            PTSDFDoc *cos_doc=[doc GetSDFDoc];
107            int num_objs = [cos_doc XRefSize];
108            int i;
109            for(i=1; i<num_objs; ++i) 
110            {
111                PTObj * obj = [cos_doc GetObj: i];
112                if((obj != NULL) && (![obj IsFree]) && ([obj IsStream])) 
113                {
114                    // Process only images
115                    PTDictIterator *itr = [obj Find: @"Type"];
116                    if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"XObject"]))
117                        continue;
118
119                    itr = [obj Find: @"Subtype"];
120                    if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"Image"]))
121                        continue;
122                    
123                    PTImage *image = [[PTImage alloc] initWithImage_xobject: obj];
124                    NSLog(@"--> Image: %d", ++image_counter);
125                    NSLog(@"    Width: %d", [image GetImageWidth]);
126                    NSLog(@"    Height: %d", [image GetImageHeight]);
127                    NSLog(@"    BPC: %d", [image GetBitsPerComponent]);
128                
129                    NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract2_%d", image_counter]];
130                    [image ExportToFile: path];
131                }
132            }
133            NSLog(@"Done.");
134        }
135        @catch(NSException* e)
136        {
137        NSLog(@"%@", e.reason);
138            ret = 1;
139        }
140        [PTPDFNet Terminate: 0];        
141        return ret;
142    }
143}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import PDFNet
7import Foundation
8
9//-----------------------------------------------------------------------------------
10// This sample illustrates one approach to PDF image extraction
11// using PDFNet.
12//
13// Note: Besides direct image export, you can also convert PDF images
14// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
15// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
16// sample project).
17//-----------------------------------------------------------------------------------
18
19var image_counter: Int = 0
20
21func ImageExtract(reader: PTElementReader) {
22    while let element = reader.next() {
23        switch element.getType() {
24        case e_ptimage, e_ptinline_image:
25            image_counter += 1
26            print("--> Image: \(image_counter)")
27            print("    Width: \(element.getImageWidth())")
28            print("    Height: \(element.getImageHeight())")
29            print("    BPC: \(element.getBitsPerComponent())")
30            
31            let ctm: PTMatrix2D = element.getCTM()
32            let x2: Double = 1
33            let y2: Double = 1
34            ctm.mult(PTPDFPoint(px: x2, py: y2))
35            print("    Coords: x1=\(ctm.getM_h()), y1=\(ctm.getM_v()), x2=\(x2), y2=\(y2)")
36            
37            if element.getType() == e_ptimage {
38                let image: PTImage = PTImage(image_xobject: element.getXObject())
39                let path: String = URL(fileURLWithPath: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("").absoluteString).appendingPathComponent("image_extract1_\(image_counter)").path
40                image.export(toFile: path)
41            }
42        case e_ptform:
43            // Process form XObjects
44            reader.formBegin()
45            ImageExtract(reader: reader)
46            reader.end()
47        default:
48            break
49        }
50    }
51}
52
53func runImageExtractTest() -> Int {
54    return autoreleasepool {
55        var ret: Int = 0
56        
57        // Initialize PDFNet
58        
59        
60        // Example 1:
61        // Extract images by traversing the display list for
62        // every page. With this approach it is possible to obtain
63        // image positioning information and DPI.
64        do {
65            try PTPDFNet.catchException {
66                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
67                doc.initSecurityHandler()
68                
69                let reader: PTElementReader = PTElementReader()
70                //  Read every page
71                let itr: PTPageIterator = doc.getPageIterator(1)
72                while itr.hasNext() {
73                    reader.begin(itr.current())
74                    ImageExtract(reader: reader)
75                    reader.end()
76                    itr.next()
77                }
78                
79                print("Done...")
80            }
81        } catch let e as NSError {
82            print("\(e)")
83            ret = 1
84        }
85        
86        print("----------------------------------------------------------------")
87        
88        // Example 2:
89        // Extract images by scanning the low-level document.
90        do {
91            try PTPDFNet.catchException {
92                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
93                doc.initSecurityHandler()
94                
95                image_counter = 0
96                
97                let cos_doc: PTSDFDoc = doc.getSDFDoc()
98                let num_objs = cos_doc.xRefSize()
99                for i in 1..<num_objs {
100                    guard let obj: PTObj = cos_doc.getObj(i) else {
101                        continue
102                    }
103                    if !obj.isFree() && obj.isStream() {
104                        // Process only images
105                        var itr: PTDictIterator = obj.find("Type")
106                        if !itr.hasNext() || !(itr.value().getName() == "XObject") {
107                            continue
108                        }
109                        
110                        itr = obj.find("Subtype")
111                        if !itr.hasNext() || !(itr.value().getName() == "Image") {
112                            continue
113                        }
114                        
115                        let image: PTImage = PTImage(image_xobject: obj)
116                        image_counter += 1
117                        print("-. Image: \(image_counter)")
118                        print("    Width: \(image.getWidth())")
119                        print("    Height: \(image.getHeight())")
120                        print("    BPC: \(image.getBitsPerComponent())")
121                        
122                        let path: String = URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("image_extract2_\(image_counter)").path
123                        image.export(toFile: path)
124                    }
125                }
126                
127                print("Done...")
128            }
129        } catch let e as NSError {
130            print("\(e)")
131            ret = 1
132        }
133        
134        return ret
135    }
136}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

ImageExtract