ImageExtract

Sample Obj-C code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our iOS SDK and PDF Data Extraction SDK Capabilities.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#import <OBJC/PDFNetOBJC.h>
7#import <Foundation/Foundation.h>
8
9//-----------------------------------------------------------------------------------
10// This sample illustrates one approach to PDF image extraction 
11// using PDFNet.
12// 
13// Note: Besides direct image export, you can also convert PDF images 
14// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
15// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
16// sample project).
17//-----------------------------------------------------------------------------------
18
19int image_counter = 0;
20
21void ImageExtract(PTElementReader* reader)
22{
23    PTElement *element; 
24    while ((element = [reader Next]) != NULL)
25    {
26        switch ([element GetType]) 
27        {
28        case e_ptimage:
29        case e_ptinline_image: 
30            {
31                NSLog(@"--> Image: %d", ++image_counter);
32                NSLog(@"    Width: %d", [element GetImageWidth]);
33                NSLog(@"    Height: %d", [element GetImageHeight]);
34                NSLog(@"    BPC: %d", [element GetBitsPerComponent]);
35
36                PTMatrix2D *ctm = [element GetCTM];
37                double x2=1, y2=1;
38                PTPDFPoint* p = [ctm Mult: [[PTPDFPoint alloc] initWithPx: x2 py: y2]];
39                NSLog(@"    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", [ctm getM_h], [ctm getM_v], [p getX], [p getY]);
40
41                if ([element GetType] == e_ptimage) 
42                {
43                    PTImage *image = [[PTImage alloc] initWithImage_xobject: [element GetXObject]];
44                    
45                    NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract1_%d", image_counter]];
46                    [image ExportToFile: path];
47                }
48            }
49            break;
50        case e_ptform: // Process form XObjects
51            [reader FormBegin]; 
52            ImageExtract(reader);
53            [reader End]; 
54            break;
55        default:
56            break;
57        }
58    }
59}
60
61int main(int argc, char *argv[])
62{
63    @autoreleasepool {
64        int ret = 0;
65
66        // Initialize PDFNet
67        [PTPDFNet Initialize: 0];
68
69        // Example 1: 
70        // Extract images by traversing the display list for 
71        // every page. With this approach it is possible to obtain 
72        // image positioning information and DPI.
73        @try  
74        { 
75            PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
76            [doc InitSecurityHandler];
77            PTElementReader *reader = [[PTElementReader alloc] init];
78            //  Read every page
79            PTPageIterator *itr;
80            for (itr=[doc GetPageIterator: 1]; [itr HasNext]; [itr Next]) 
81            {
82                [reader Begin: [itr Current]];
83                ImageExtract(reader);
84                [reader End];
85            }
86
87            NSLog(@"Done.");
88        }
89        @catch(NSException* e)
90        {
91            NSLog(@"%@", e.reason);
92            ret = 1;
93        }
94
95        NSLog(@"----------------------------------------------------------------");
96
97        // Example 2: 
98        // Extract images by scanning the low-level document.
99        @try  
100        { 
101            PTPDFDoc *doc = [[PTPDFDoc alloc] initWithFilepath: @"../../TestFiles/newsletter.pdf"];
102        
103            [doc InitSecurityHandler];
104            image_counter = 0;
105
106            PTSDFDoc *cos_doc=[doc GetSDFDoc];
107            int num_objs = [cos_doc XRefSize];
108            int i;
109            for(i=1; i<num_objs; ++i) 
110            {
111                PTObj * obj = [cos_doc GetObj: i];
112                if((obj != NULL) && (![obj IsFree]) && ([obj IsStream])) 
113                {
114                    // Process only images
115                    PTDictIterator *itr = [obj Find: @"Type"];
116                    if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"XObject"]))
117                        continue;
118
119                    itr = [obj Find: @"Subtype"];
120                    if((![itr HasNext]) || (![[[itr Value] GetName] isEqualToString: @"Image"]))
121                        continue;
122                    
123                    PTImage *image = [[PTImage alloc] initWithImage_xobject: obj];
124                    NSLog(@"--> Image: %d", ++image_counter);
125                    NSLog(@"    Width: %d", [image GetImageWidth]);
126                    NSLog(@"    Height: %d", [image GetImageHeight]);
127                    NSLog(@"    BPC: %d", [image GetBitsPerComponent]);
128                
129                    NSString *path = [@"../../TestFiles/Output/" stringByAppendingPathComponent:[NSString stringWithFormat:@"image_extract2_%d", image_counter]];
130                    [image ExportToFile: path];
131                }
132            }
133            NSLog(@"Done.");
134        }
135        @catch(NSException* e)
136        {
137        NSLog(@"%@", e.reason);
138            ret = 1;
139        }
140        [PTPDFNet Terminate: 0];        
141        return ret;
142    }
143}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import PDFNet
7import Foundation
8
9//-----------------------------------------------------------------------------------
10// This sample illustrates one approach to PDF image extraction
11// using PDFNet.
12//
13// Note: Besides direct image export, you can also convert PDF images
14// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
15// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
16// sample project).
17//-----------------------------------------------------------------------------------
18
19var image_counter: Int = 0
20
21func ImageExtract(reader: PTElementReader) {
22    while let element = reader.next() {
23        switch element.getType() {
24        case e_ptimage, e_ptinline_image:
25            image_counter += 1
26            print("--> Image: \(image_counter)")
27            print("    Width: \(element.getImageWidth())")
28            print("    Height: \(element.getImageHeight())")
29            print("    BPC: \(element.getBitsPerComponent())")
30            
31            let ctm: PTMatrix2D = element.getCTM()
32            let x2: Double = 1
33            let y2: Double = 1
34            ctm.mult(PTPDFPoint(px: x2, py: y2))
35            print("    Coords: x1=\(ctm.getM_h()), y1=\(ctm.getM_v()), x2=\(x2), y2=\(y2)")
36            
37            if element.getType() == e_ptimage {
38                let image: PTImage = PTImage(image_xobject: element.getXObject())
39                let path: String = URL(fileURLWithPath: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("").absoluteString).appendingPathComponent("image_extract1_\(image_counter)").path
40                image.export(toFile: path)
41            }
42        case e_ptform:
43            // Process form XObjects
44            reader.formBegin()
45            ImageExtract(reader: reader)
46            reader.end()
47        default:
48            break
49        }
50    }
51}
52
53func runImageExtractTest() -> Int {
54    return autoreleasepool {
55        var ret: Int = 0
56        
57        // Initialize PDFNet
58        
59        
60        // Example 1:
61        // Extract images by traversing the display list for
62        // every page. With this approach it is possible to obtain
63        // image positioning information and DPI.
64        do {
65            try PTPDFNet.catchException {
66                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
67                doc.initSecurityHandler()
68                
69                let reader: PTElementReader = PTElementReader()
70                //  Read every page
71                let itr: PTPageIterator = doc.getPageIterator(1)
72                while itr.hasNext() {
73                    reader.begin(itr.current())
74                    ImageExtract(reader: reader)
75                    reader.end()
76                    itr.next()
77                }
78                
79                print("Done...")
80            }
81        } catch let e as NSError {
82            print("\(e)")
83            ret = 1
84        }
85        
86        print("----------------------------------------------------------------")
87        
88        // Example 2:
89        // Extract images by scanning the low-level document.
90        do {
91            try PTPDFNet.catchException {
92                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
93                doc.initSecurityHandler()
94                
95                image_counter = 0
96                
97                let cos_doc: PTSDFDoc = doc.getSDFDoc()
98                let num_objs = cos_doc.xRefSize()
99                for i in 1..<num_objs {
100                    guard let obj: PTObj = cos_doc.getObj(i) else {
101                        continue
102                    }
103                    if !obj.isFree() && obj.isStream() {
104                        // Process only images
105                        var itr: PTDictIterator = obj.find("Type")
106                        if !itr.hasNext() || !(itr.value().getName() == "XObject") {
107                            continue
108                        }
109                        
110                        itr = obj.find("Subtype")
111                        if !itr.hasNext() || !(itr.value().getName() == "Image") {
112                            continue
113                        }
114                        
115                        let image: PTImage = PTImage(image_xobject: obj)
116                        image_counter += 1
117                        print("-. Image: \(image_counter)")
118                        print("    Width: \(image.getWidth())")
119                        print("    Height: \(image.getHeight())")
120                        print("    BPC: \(image.getBitsPerComponent())")
121                        
122                        let path: String = URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("image_extract2_\(image_counter)").path
123                        image.export(toFile: path)
124                    }
125                }
126                
127                print("Done...")
128            }
129        } catch let e as NSError {
130            print("\(e)")
131            ret = 1
132        }
133        
134        return ret
135    }
136}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

ImageExtract