Extract Image from PDFs - Python Sample Code

Sample code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI; provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with Server SDK.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using System.Drawing;
7using System.Drawing.Imaging;
8
9using pdftron;
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13using pdftron.Filters;
14
15namespace ImageExtractTestCS
16{
17	class Class1
18	{
19		/// <summary>
20		///-----------------------------------------------------------------------------------
21		/// This sample illustrates one approach to PDF image extraction 
22		/// using PDFNet.
23		/// 
24		/// Note: Besides direct image export, you can also convert PDF images 
25		/// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
26		/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
27		/// sample project).
28		///-----------------------------------------------------------------------------------
29		/// </summary>
30
31		private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
32		static Class1() {}
33
34		static int image_counter = 0;
35
36		// Relative path to the folder containing test files.
37		static string input_path =  "../../../../TestFiles/";
38		static string output_path = "../../../../TestFiles/Output/";
39
40		static void ImageExtract(PDFDoc doc, ElementReader reader) 
41		{
42			Element element; 
43			while ((element = reader.Next()) != null)
44			{
45				switch (element.GetType()) 
46				{
47					case Element.Type.e_image:
48					case Element.Type.e_inline_image:
49					{
50						Console.WriteLine("--> Image: {0}", ++image_counter);
51						Console.WriteLine("    Width: {0}", element.GetImageWidth());
52						Console.WriteLine("    Height: {0}", element.GetImageHeight());
53						Console.WriteLine("    BPC: {0}", element.GetBitsPerComponent());
54
55						Matrix2D ctm = element.GetCTM();
56						double x2=1, y2=1, y1=ctm.m_v;
57						ctm.Mult(ref x2, ref y2);
58						// Write the coords to 3 decimal places.
59						Console.WriteLine("    Coords: x1={0:N2}, y1={1:N2}, x2={2:N2}, y2={3:N2}", ctm.m_h, ctm.m_v, x2, y2);
60						pdftron.PDF.Image image = null;
61						if (element.GetType() == Element.Type.e_image) 
62						{
63							image = new pdftron.PDF.Image(element.GetXObject());
64
65							string fname = output_path + "image_extract1_" + image_counter.ToString();
66							image.Export(fname);  // or ExporAsPng() or ExporAsTiff() ...
67						}
68						break;
69					}
70					case Element.Type.e_form: // Process form XObjects
71					{
72						reader.FormBegin(); 
73						ImageExtract(doc, reader);
74						reader.End(); 
75						break; 
76					}
77				}
78			}
79		}
80
81		static void Main(string[] args)
82		{
83			PDFNet.Initialize(PDFTronLicense.Key);
84			
85			// Example 1: 
86			// Extract images by traversing the display list for 
87			// every page. With this approach it is possible to obtain 
88			// image positioning information and DPI.
89			try	
90			{
91				using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
92				using (ElementReader reader = new ElementReader())
93				{
94					doc.InitSecurityHandler();
95					PageIterator itr;
96					for (itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())	
97					{				
98						reader.Begin(itr.Current());
99						ImageExtract(doc, reader);
100						reader.End();
101					}
102
103					Console.WriteLine("Done.");
104				}
105			}
106			catch (PDFNetException e)
107			{
108				Console.WriteLine(e.Message);
109			}
110
111			Console.WriteLine("----------------------------------------------------------------");
112
113			// Example 2: 
114			// Extract images by scanning the low-level document.
115			try	
116			{
117				using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
118				{
119					doc.InitSecurityHandler();
120					image_counter = 0;
121
122					SDFDoc cos_doc = doc.GetSDFDoc();
123					int num_objs = cos_doc.XRefSize();
124					for (int i=1; i<num_objs; ++i)
125					{
126						Obj obj = cos_doc.GetObj(i);
127						if (obj!=null && !obj.IsFree()&& obj.IsStream()) 
128						{
129							// Process only images
130							DictIterator itr = obj.Find("Subtype");
131							if (!itr.HasNext() || itr.Value().GetName() != "Image") 
132								continue; 
133
134							itr = obj.Find("Type");
135							if (!itr.HasNext() || itr.Value().GetName() != "XObject") 
136								continue;
137
138							pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
139
140							Console.WriteLine("--> Image: {0}", ++image_counter);
141							Console.WriteLine("    Width: {0}", image.GetImageWidth());
142							Console.WriteLine("    Height: {0}", image.GetImageHeight());
143							Console.WriteLine("    BPC: {0}", image.GetBitsPerComponent());
144
145							string fname = output_path + "image_extract2_" + image_counter.ToString();
146							image.Export(fname);  // or ExporAsPng() or ExporAsTiff() ...
147
148							// Convert PDF bitmap to GDI+ Bitmap...
149							//Bitmap bmp = image.GetBitmap();
150							//bmp.Save(fname, ImageFormat.Png);
151							//bmp.Dispose();
152
153							// Instead of converting PDF images to a Bitmap, you can also extract 
154							// uncompressed/compressed image data directly using element.GetImageData() 
155							// as illustrated in ElementReaderAdv sample project.
156						}
157					}
158					Console.WriteLine("Done.");
159				}
160			}
161			catch (PDFNetException e)
162			{
163				Console.WriteLine(e.Message);
164			}
165			PDFNet.Terminate();
166
167		}
168	}
169}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8    "fmt"
9    "strconv"
10    . "pdftron"
11)
12
13import  "pdftron/Samples/LicenseKey/GO"
14
15//-----------------------------------------------------------------------------------
16// This sample illustrates one approach to PDF image extraction 
17// using PDFNet.
18// 
19// Note: Besides direct image export, you can also convert PDF images 
20// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
21// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
22// sample project).
23//-----------------------------------------------------------------------------------
24
25var imageCounter = 0
26
27// Relative path to the folder containing the test files.
28var inputPath = "../../TestFiles/"
29var outputPath = "../../TestFiles/Output/"
30
31func ImageExtract(reader ElementReader){
32    element := reader.Next()
33
34    for element.GetMp_elem().Swigcptr() != 0{
35        if (element.GetType() == ElementE_image ||
36            element.GetType() == ElementE_inline_image){
37            imageCounter += 1
38            fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
39            fmt.Println("    Width: " + strconv.Itoa(element.GetImageWidth()))
40            fmt.Println("    Height: " + strconv.Itoa(element.GetImageHeight()))
41            fmt.Println("    BPC: " + strconv.Itoa(element.GetBitsPerComponent()))
42            
43            ctm := element.GetCTM()
44            x2 := 1
45            y2 := 1
46            pt := NewPoint(float64(x2), float64(y2))
47            point := ctm.Mult(pt)
48            fmt.Println("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.GetM_h(), ctm.GetM_v(), point.GetX(), point.GetY())
49            
50            if element.GetType() == ElementE_image{
51                image := NewImage(element.GetXObject())
52                
53                fname := "image_extract1_" + strconv.Itoa(imageCounter)
54                
55                path := outputPath + fname
56                image.Export(path)
57                
58                //path = outputPath + fname + ".tif"
59                //image.ExportAsTiff(path)
60                
61                //path = outputPath + fname + ".png"
62                //image.ExportAsPng(path)
63            }
64        }else if element.GetType() == ElementE_form{
65            reader.FormBegin()
66            ImageExtract(reader)
67            reader.End() 
68        }
69        element = reader.Next()
70    }
71}
72
73func main(){
74    // Initialize PDFNet
75    PDFNetInitialize(PDFTronLicense.Key)    
76    
77    // Example 1: 
78    // Extract images by traversing the display list for 
79    // every page. With this approach it is possible to obtain 
80    // image positioning information and DPI.
81    
82    doc := NewPDFDoc(inputPath + "newsletter.pdf")
83    doc.InitSecurityHandler()
84    
85    reader := NewElementReader()
86    
87    // Read every page
88    itr := doc.GetPageIterator()
89    for itr.HasNext(){
90        reader.Begin(itr.Current())
91        ImageExtract(reader)
92        reader.End()
93        itr.Next()
94    }
95
96    doc.Close()
97    fmt.Println("Done.")
98    
99    fmt.Println("----------------------------------------------------------------")
100    
101    // Example 2: 
102    // Extract images by scanning the low-level document.
103    
104    doc = NewPDFDoc(inputPath + "newsletter.pdf")
105    doc.InitSecurityHandler()
106    imageCounter= 0
107    
108    cosDoc := doc.GetSDFDoc()
109    numObjs := cosDoc.XRefSize()
110    i := uint(1)
111    for i < numObjs{
112        obj := cosDoc.GetObj(i)
113        if(obj != nil && !obj.IsFree() && obj.IsStream()){
114            
115            // Process only images
116            itr := obj.Find("Type")
117            
118            if (!itr.HasNext()) || (itr.Value().GetName() != "XObject"){
119                i = i + 1
120                continue
121            }
122            itr = obj.Find("Subtype")
123            if (!itr.HasNext()) || (itr.Value().GetName() != "Image"){
124                i = i + 1
125                continue
126            }
127            image := NewImage(obj)
128            
129            imageCounter = imageCounter + 1
130            fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
131            fmt.Println("    Width: " + strconv.Itoa(image.GetImageWidth()))
132            fmt.Println("    Height: " + strconv.Itoa(image.GetImageHeight()))
133            fmt.Println("    BPC: " + strconv.Itoa(image.GetBitsPerComponent()))
134            
135            fname := "image_extract2_" + strconv.Itoa(imageCounter)
136                
137            path := outputPath + fname
138            image.Export(path)
139            
140            //path = outputPath + fname + ".tif"
141            //image.ExportAsTiff(path)
142            
143            //path = outputPath + fname + ".png"
144            //image.ExportAsPng(path)
145        }
146        i = i + 1
147    }
148    doc.Close()
149    PDFNetTerminate()
150    fmt.Println("Done.")
151}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.Matrix2D;
7import com.pdftron.common.PDFNetException;
8import com.pdftron.pdf.*;
9import com.pdftron.sdf.DictIterator;
10import com.pdftron.sdf.Obj;
11import com.pdftron.sdf.SDFDoc;
12
13///-----------------------------------------------------------------------------------
14/// This sample illustrates one approach to PDF image extraction 
15/// using PDFNet.
16/// 
17/// Note: Besides direct image export, you can also convert PDF images 
18/// to Java image, or extract uncompressed/compressed image data directly 
19/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
20/// sample project).
21///-----------------------------------------------------------------------------------
22public class ImageExtractTest {
23
24    // Relative paths to folders containing test files.
25    static String input_path = "../../TestFiles/";
26    static String output_path = "../../TestFiles/Output/";
27
28    static int image_counter = 0;
29
30    static void ImageExtract(ElementReader reader) throws PDFNetException {
31        Element element;
32        while ((element = reader.next()) != null) {
33            switch (element.getType()) {
34                case Element.e_image:
35                case Element.e_inline_image: {
36                    System.out.println("--> Image: " + (++image_counter));
37                    System.out.println("    Width: " + element.getImageWidth());
38                    System.out.println("    Height: " + element.getImageHeight());
39                    System.out.println("    BPC: " + element.getBitsPerComponent());
40
41                    Matrix2D ctm = element.getCTM();
42                    double x2 = 1, y2 = 1;
43                    java.awt.geom.Point2D.Double p = ctm.multPoint(x2, y2);
44                    System.out.println(String.format("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.getX(), p.getY()));
45
46                    if (element.getType() == Element.e_image) {
47                        Image image = new Image(element.getXObject());
48
49                        String fname = "image_extract1_" + image_counter;
50
51                        String path = output_path + fname;
52                        image.export(path);
53
54                        //String path2 = output_path + fname + ".tif";
55                        //image.exportAsTiff(path2);
56
57                        //String path3 = output_path + fname + ".png";
58                        //image.exportAsPng(path3);
59                    }
60                }
61                break;
62                case Element.e_form:        // Process form XObjects
63                    reader.formBegin();
64                    ImageExtract(reader);
65                    reader.end();
66                    break;
67            }
68        }
69    }
70
71    public static void main(String[] args) {
72        // Initialize PDFNet
73        PDFNet.initialize(PDFTronLicense.Key());
74
75        // Example 1:
76        // Extract images by traversing the display list for
77        // every page. With this approach it is possible to obtain
78        // image positioning information and DPI.
79        try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
80            doc.initSecurityHandler();
81            ElementReader reader = new ElementReader();
82            //  Read every page
83            for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
84                reader.begin(itr.next());
85                ImageExtract(reader);
86                reader.end();
87            }
88            System.out.println("Done.");
89        } catch (Exception e) {
90            e.printStackTrace();
91        }
92
93
94        System.out.println("----------------------------------------------------------------");
95
96        // Example 2:
97        // Extract images by scanning the low-level document.
98        try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
99            doc.initSecurityHandler();
100            image_counter = 0;
101            SDFDoc cos_doc = doc.getSDFDoc();
102            long num_objs = cos_doc.xRefSize();
103            for (int i = 1; i < num_objs; ++i) {
104                Obj obj = cos_doc.getObj(i);
105                if (obj != null && !obj.isFree() && obj.isStream()) {
106                    // Process only images
107                    DictIterator itr = obj.find("Type");
108                    if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
109                        continue;
110
111                    itr = obj.find("Subtype");
112                    if (!itr.hasNext() || !itr.value().getName().equals("Image"))
113                        continue;
114
115                    Image image = new Image(obj);
116
117                    System.out.println("--> Image: " + (++image_counter));
118                    System.out.println("    Width: " + image.getImageWidth());
119                    System.out.println("    Height: " + image.getImageHeight());
120                    System.out.println("    BPC: " + image.getBitsPerComponent());
121
122                    String fname = "image_extract2_" + image_counter;
123                    String path = output_path + fname;
124                    image.export(path);
125
126                    //String path= output_path + fname + ".tif";
127                    //image.exportAsTiff(path);
128
129                    //String path = output_path + fname + ".png";
130                    //image.exportAsPng(path);
131                }
132            }
133            
134            System.out.println("Done.");
135        } catch (Exception e) {
136            e.printStackTrace();
137        }
138
139        PDFNet.terminate();
140    }
141}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <PDF/PDFNet.h>
7#include <PDF/PDFDoc.h>
8#include <PDF/ElementReader.h>
9#include <PDF/Image.h>
10#include "../../LicenseKey/CPP/LicenseKey.h"
11
12//-----------------------------------------------------------------------------------
13// This sample illustrates one approach to PDF image extraction 
14// using PDFNet.
15// 
16// Note: Besides direct image export, you can also convert PDF images 
17// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
18// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
19// sample project).
20//-----------------------------------------------------------------------------------
21
22#include <iostream>
23#include <iomanip>
24
25using namespace std;
26
27using namespace pdftron;
28using namespace Common;
29using namespace SDF;
30using namespace PDF;
31
32// Relative paths to folders containing test files.
33string input_path =  "../../TestFiles/";
34string output_path = "../../TestFiles/Output/";
35
36int image_counter = 0;
37
38void ImageExtract(ElementReader& reader) 
39{
40	// Set the precision for printing doubles on cout to 3 decimal places.
41	ios iostate(NULL);
42	iostate.copyfmt(cout);
43	cout << fixed << showpoint << setprecision(3);
44
45	Element element; 
46	while ((element = reader.Next()) != 0)
47	{
48		switch (element.GetType()) 
49		{
50		case Element::e_image: 
51		case Element::e_inline_image: 
52			{
53				cout << "--> Image: " << ++image_counter << endl;
54				cout << "    Width: " << element.GetImageWidth() << endl;
55				cout << "    Height: " << element.GetImageHeight() << endl;
56				cout << "    BPC: " << element.GetBitsPerComponent() << endl;
57
58				Common::Matrix2D ctm = element.GetCTM();
59				double x2=1, y2=1;
60				ctm.Mult(x2, y2);
61				printf("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f\n", ctm.m_h, ctm.m_v, x2, y2);
62
63				if (element.GetType() == Element::e_image) 
64				{
65					Image image(element.GetXObject());
66
67					char fname[256];
68					sprintf(fname, "image_extract1_%d", image_counter);
69
70					string path(output_path + fname);
71					image.Export(path.c_str());
72
73					//string path(output_path + fname + ".tif");
74					//image.ExportAsTiff(path.c_str());
75
76					//string path(output_path + fname + ".png");
77					//image.ExportAsPng(path.c_str());
78				}
79			}
80			break;
81		case Element::e_form:		// Process form XObjects
82			reader.FormBegin(); 
83			ImageExtract(reader);
84			reader.End(); 
85			break; 
86		}
87	}
88
89	// Reset cout's state.
90	cout.copyfmt(iostate);
91}
92
93int main(int argc, char *argv[])
94{
95	int ret = 0;
96
97	// Initialize PDFNet
98	PDFNet::Initialize(LicenseKey);
99
100	// Example 1: 
101	// Extract images by traversing the display list for 
102	// every page. With this approach it is possible to obtain 
103	// image positioning information and DPI.
104	try  
105	{	 
106		PDFDoc doc((input_path + "newsletter.pdf").c_str());
107		doc.InitSecurityHandler();
108		ElementReader reader;
109		//  Read every page
110		for (PageIterator itr=doc.GetPageIterator(); itr.HasNext(); itr.Next()) 
111		{				
112			reader.Begin(itr.Current());
113			ImageExtract(reader);
114			reader.End();
115		}
116
117		cout << "Done." << endl;
118	}
119	catch(Common::Exception& e)
120	{
121		cout << e << endl;
122		ret = 1;
123	}
124	catch(...)
125	{
126		cout << "Unknown Exception" << endl;
127		ret = 1;
128	}
129
130	cout << "----------------------------------------------------------------" << endl;
131
132	// Example 2: 
133	// Extract images by scanning the low-level document.
134	try  
135	{	 
136		PDFDoc doc((input_path + "newsletter.pdf").c_str());
137
138		doc.InitSecurityHandler();
139		image_counter = 0;
140
141		SDFDoc& cos_doc=doc.GetSDFDoc();
142		int num_objs = cos_doc.XRefSize();
143		for(int i=1; i<num_objs; ++i) 
144		{
145			Obj obj = cos_doc.GetObj(i);
146			if(obj && !obj.IsFree() && obj.IsStream()) 
147			{
148				// Process only images
149				DictIterator itr = obj.Find("Type");
150				if(!itr.HasNext() || strcmp(itr.Value().GetName(), "XObject"))
151					continue;
152
153				itr = obj.Find("Subtype");
154				if(!itr.HasNext() || strcmp(itr.Value().GetName(), "Image"))
155					continue;
156				
157				PDF::Image image(obj);
158				cout << "--> Image: " << ++image_counter << endl;
159				cout << "    Width: " << image.GetImageWidth() << endl;
160				cout << "    Height: " << image.GetImageHeight() << endl;
161				cout << "    BPC: " << image.GetBitsPerComponent() << endl;
162
163				char fname[256];
164				sprintf(fname, "image_extract2_%d", image_counter);
165				string path(output_path + fname);
166				image.Export(path.c_str());
167
168				//string path(output_path + fname + ".tif");
169				//image.ExportAsTiff(path.c_str());
170
171				//string path(output_path + fname + ".png");
172				//image.ExportAsPng(path.c_str());
173			}
174		}
175
176		cout << "Done." << endl;
177	}
178	catch(Common::Exception& e)
179	{
180		cout << e << endl;
181		ret = 1;
182	}
183	catch(...)
184	{
185		cout << "Unknown Exception" << endl;
186		ret = 1;
187	}
188
189	PDFNet::Terminate();
190	return ret;
191}

1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10// Relative path to the folder containing the test files.
11$input_path = getcwd()."/../../TestFiles/";
12$output_path = $input_path."Output/";
13
14//-----------------------------------------------------------------------------------
15// This sample illustrates one approach to PDF image extraction 
16// using PDFNet.
17// 
18// Note: Besides direct image export, you can also convert PDF images 
19// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
20// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
21// sample project).
22//-----------------------------------------------------------------------------------
23
24$image_counter = 0;
25
26function ImageExtract($reader) 
27{
28	while (($element = $reader->Next()) != null)
29	{
30		switch ($element->GetType()) 
31		{
32		case Element::e_image: 
33		case Element::e_inline_image: 
34			{
35				global $image_counter;
36				echo nl2br("--> Image: ".++$image_counter."\n");
37				echo nl2br("    Width: ".$element->GetImageWidth()."\n");
38				echo nl2br("    Height: ".$element->GetImageHeight()."\n");
39				echo nl2br("    BPC: ".$element->GetBitsPerComponent()."\n");
40
41				$ctm = $element->GetCTM();
42				$x2=1.0;
43				$y2=1.0;
44				$point = $ctm->Mult(new Point($x2, $y2));
45				printf("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f\n", $ctm->m_h, $ctm->m_v, $point->x, $point->y);
46				if ($element->GetType() == Element::e_image) 
47				{
48					$image = new Image($element->GetXObject());
49
50					$fname = "image_extract1_".$image_counter;
51					global $output_path;
52					$path = $output_path.$fname;
53					$image->Export($path);
54
55					//$path = $output_path.$fname.".tif";
56					//$image->ExportAsTiff($path);
57
58					//$path = $output_path $fname.".png";
59					//$image->ExportAsPng($path);
60				}
61			}
62			break;
63		case Element::e_form:		// Process form XObjects
64			$reader->FormBegin(); 
65			ImageExtract($reader);
66			$reader->End(); 
67			break; 
68		}
69	}
70}
71
72	// Initialize PDFNet
73	PDFNet::Initialize($LicenseKey);
74	PDFNet::GetSystemFontList();    // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
75
76	// Example 1: 
77	// Extract images by traversing the display list for 
78	// every page. With this approach it is possible to obtain 
79	// image positioning information and DPI.
80	$doc = new PDFDoc($input_path."newsletter.pdf");
81	$doc->InitSecurityHandler();
82
83	$reader = new ElementReader();
84	//  Read every page
85	for ($itr=$doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) 
86	{				
87		$reader->Begin($itr->Current());
88		ImageExtract($reader);
89		$reader->End();
90	}
91
92	$doc->Close();
93	echo nl2br("Done.\n");
94
95	echo nl2br("----------------------------------------------------------------\n");
96
97	// Example 2: 
98	// Extract images by scanning the low-level document.
99	$doc = new PDFDoc($input_path."newsletter.pdf");
100
101	$doc->InitSecurityHandler();
102	$image_counter = 0;
103
104	$cos_doc=$doc->GetSDFDoc();
105	$num_objs = $cos_doc->XRefSize();
106	for($i=1; $i<$num_objs; ++$i) 
107	{
108		$obj = $cos_doc->GetObj($i);
109		if($obj != null && !$obj->IsFree() && $obj->IsStream()) 
110		{
111			// Process only images
112			$itr = $obj->Find("Type");
113			if(!$itr->HasNext() || !($itr->Value()->GetName() == "XObject"))
114			{
115				continue;
116			}
117
118			$itr = $obj->Find("Subtype");
119			if(!$itr->HasNext() || !($itr->Value()->GetName() == "Image"))
120			{
121				continue;
122			}
123				
124			$image = new Image($obj);
125			echo nl2br("--> Image: ".++$image_counter."\n");
126			echo nl2br("    Width: ".$image->GetImageWidth()."\n");
127			echo nl2br("    Height: ".$image->GetImageHeight()."\n");
128			echo nl2br("    BPC: ".$image->GetBitsPerComponent()."\n");
129
130			$fname = "image_extract2_".$image_counter;
131			$path = $output_path.$fname;
132			$image->Export($path);
133
134			//$path = $output_path.$fname.".tif");
135			//$image->ExportAsTiff($path);
136
137			//$path = $output_path.fname.".png");
138			//$image->ExportAsPng($path);
139		}
140	}
141
142	$doc->Close();
143	PDFNet::Terminate();
144	echo nl2br("Done.\n");
145	
146?>

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6//-----------------------------------------------------------------------------------
7// This sample illustrates one approach to PDF image extraction 
8// using PDFNet.
9// 
10// Note: Besides direct image export, you can also convert PDF images 
11// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
12// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
13// sample project).
14//-----------------------------------------------------------------------------------
15
16const { PDFNet } = require('@pdftron/pdfnet-node');
17const PDFTronLicense = require('../LicenseKey/LicenseKey');
18
19((exports) => {
20  'use strict';
21
22  exports.runImageExtractTest = () => {
23
24    let image_counter = 0;
25    const outputPath = '../TestFiles/Output/';
26
27    const imageExtract = async (reader) => {
28      let element;
29      while ((element = await reader.next()) !== null) {
30        switch (await element.getType()) {
31          case PDFNet.Element.Type.e_image:
32          case PDFNet.Element.Type.e_inline_image:
33            console.log('--> Image: ' + ++image_counter);
34            console.log('    Width: ' + await element.getImageWidth());
35            console.log('    Height: ' + await element.getImageHeight());
36            console.log('    BPC: ' + await element.getBitsPerComponent());
37
38            const ctm = await element.getCTM();
39            let x2 = 1, y2 = 1;
40            const result = await ctm.mult(x2, y2);
41            x2 = result.x;
42            y2 = result.y;
43            console.log('    Coords: x1=' + ctm.m_h.toFixed(2) + ', y1=' + ctm.m_v.toFixed(2)
44             + ', x2=' + x2.toFixed(2) + ', y2=' + y2.toFixed(2));
45
46            if (await element.getType() == PDFNet.Element.Type.e_image) {
47              const image = await PDFNet.Image.createFromObj(await element.getXObject());
48              image.export(outputPath + 'image_extract1_' + image_counter);
49            }
50            break;
51          case PDFNet.Element.Type.e_form: // Process form XObjects
52            reader.formBegin();
53            await imageExtract(reader);
54            reader.end();
55            break;
56        }
57      }
58    }
59
60    const main = async () => {
61
62      // Example 1: 
63      // Extract images by traversing the display list for 
64      // every page. With this approach it is possible to obtain 
65      // image positioning information and DPI.
66      try {
67        const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
68        doc.initSecurityHandler();
69
70        const reader = await PDFNet.ElementReader.create();
71        const itr = await doc.getPageIterator(1);
72        // Read every page
73        for (itr; await itr.hasNext(); await itr.next()) {
74          const page = await itr.current();
75          reader.beginOnPage(page);
76          await imageExtract(reader);
77          reader.end();
78        }
79
80        console.log('Done.');
81      } catch (err) {
82        console.log(err);
83      }
84
85      console.log('----------------------------------------------------------------');
86
87      // Example 2: 
88      // Extract images by scanning the low-level document.
89      try {
90        const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
91        doc.initSecurityHandler();
92        image_counter = 0;
93
94        const cos_doc = await doc.getSDFDoc();
95        const num_objs = await cos_doc.xRefSize();
96        for (var i = 0; i < num_objs; i++) {
97          const obj = await cos_doc.getObj(i);
98          if (obj && !(await obj.isFree()) && await obj.isStream()) {
99            // Process only images
100            var itr = await obj.find('Type');
101            if (!(await itr.hasNext()) || await (await itr.value()).getName() !== 'XObject')
102              continue;
103
104            itr = await obj.find('Subtype');
105            if (!(await itr.hasNext()) || await (await itr.value()).getName() !== 'Image')
106              continue;
107            const image = await PDFNet.Image.createFromObj(obj);
108            console.log('--> Image: ' + ++image_counter);
109            console.log('    Width: ' + await image.getImageWidth());
110            console.log('    Height: ' + await image.getImageHeight());
111            console.log('    BPC: ' + await image.getBitsPerComponent());
112
113            image.export(outputPath + 'image_extract2_' + image_counter);
114          }
115        }
116
117        console.log('Done.');
118      } catch (err) {
119        console.log(err);
120      }
121
122    }
123    PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
124      console.log('Error: ' + JSON.stringify(error));
125    }).then(function(){ return PDFNet.shutdown(); });
126  };
127  exports.runImageExtractTest();
128})(exports);
129// eslint-disable-next-line spaced-comment
130//# sourceURL=ImageExtractTest.js

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#-----------------------------------------------------------------------------------
15# This sample illustrates one approach to PDF image extraction 
16# using PDFNet.
17# 
18# Note: Besides direct image export, you can also convert PDF images 
19# to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
20# using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
21# sample project).
22#-----------------------------------------------------------------------------------
23
24image_counter = 0
25
26# Relative path to the folder containing the test files.
27input_path = "../../TestFiles/"
28output_path = "../../TestFiles/Output/"
29
30def ImageExtract(reader):
31    element = reader.Next()
32    while element != None:
33        if (element.GetType() == Element.e_image or
34            element.GetType() == Element.e_inline_image):
35            global image_counter
36            image_counter =image_counter + 1
37            print("--> Image: " + str(image_counter))
38            print("    Width: " + str(element.GetImageWidth()))
39            print("    Height: " + str(element.GetImageHeight()))
40            print("    BPC: " + str(element.GetBitsPerComponent()))
41            
42            ctm = element.GetCTM()
43            x2 = 1
44            y2 = 1
45            pt = Point(x2, y2)
46            point = ctm.Mult(pt)
47            print("    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f" % (ctm.m_h, ctm.m_v, point.x, point.y))
48            
49            if element.GetType() == Element.e_image:
50                image = Image(element.GetXObject())
51                
52                fname = "image_extract1_" + str(image_counter)
53                
54                path = output_path + fname
55                image.Export(path)
56                
57                #path = output_path + fname + ".tif"
58                #image.ExportAsTiff(path)
59                
60                #path = output_path + fname + ".png"
61                #image.ExportAsPng(path)
62        elif element.GetType() == Element.e_form:
63            reader.FormBegin()
64            ImageExtract(reader)
65            reader.End()            
66        element = reader.Next()
67
68def main():
69    # Initialize PDFNet
70    PDFNet.Initialize(LicenseKey)    
71    
72    # Example 1: 
73    # Extract images by traversing the display list for 
74    # every page. With this approach it is possible to obtain 
75    # image positioning information and DPI.
76    
77    doc = PDFDoc(input_path + "newsletter.pdf")
78    doc.InitSecurityHandler()
79    
80    reader = ElementReader()
81    
82    # Read every page
83    itr = doc.GetPageIterator()
84    while itr.HasNext():
85        reader.Begin(itr.Current())
86        ImageExtract(reader)
87        reader.End()
88        itr.Next()
89
90    doc.Close()
91    print("Done.")
92    
93    print("----------------------------------------------------------------")
94    
95    # Example 2: 
96    # Extract images by scanning the low-level document.
97    
98    doc = PDFDoc(input_path + "newsletter.pdf")
99    doc.InitSecurityHandler()
100    image_counter= 0
101    
102    cos_doc = doc.GetSDFDoc()
103    num_objs = cos_doc.XRefSize()
104    i = 1
105    while i < num_objs:
106        obj = cos_doc.GetObj(i)
107        if(obj is not None and not obj.IsFree() and obj.IsStream()):
108            
109            # Process only images
110            itr = obj.Find("Type")
111            
112            if not itr.HasNext() or not itr.Value().GetName() == "XObject":
113                i = i + 1
114                continue
115            
116            itr = obj.Find("Subtype")
117            if not itr.HasNext() or not itr.Value().GetName() == "Image":
118                i = i + 1
119                continue
120            
121            image = Image(obj)
122            
123            image_counter = image_counter + 1
124            print("--> Image: " + str(image_counter))
125            print("    Width: " + str(image.GetImageWidth()))
126            print("    Height: " + str(image.GetImageHeight()))
127            print("    BPC: " + str(image.GetBitsPerComponent()))
128            
129            fname = "image_extract2_" + str(image_counter)
130                
131            path = output_path + fname
132            image.Export(path)
133            
134            #path = output_path + fname + ".tif"
135            #image.ExportAsTiff(path)
136            
137            #path = output_path + fname + ".png"
138            #image.ExportAsPng(path)
139        i = i + 1
140    doc.Close()
141    PDFNet.Terminate()
142    print("Done.")
143    
144if __name__ == '__main__':
145    main()

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12#-----------------------------------------------------------------------------------
13# This sample illustrates one approach to PDF image extraction 
14# using PDFNet.
15# 
16# Note: Besides direct image export, you can also convert PDF images 
17# to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
18# using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
19# sample project).
20#-----------------------------------------------------------------------------------
21
22$image_counter = 0
23
24# Relative path to the folder containing the test files.
25$input_path = "../../TestFiles/"
26$output_path = "../../TestFiles/Output/"
27
28def ImageExtract(reader)
29	element = reader.Next()
30	while !(element.nil?) do
31		if (element.GetType() == Element::E_image or
32			element.GetType() == Element::E_inline_image)
33
34			$image_counter =$image_counter + 1
35			puts "--> Image: " + $image_counter.to_s()
36			puts "    Width: " + element.GetImageWidth().to_s()
37			puts "    Height: " + element.GetImageHeight().to_s()
38			puts "    BPC: " + element.GetBitsPerComponent().to_s()
39			
40			ctm = element.GetCTM()
41			x2 = 1
42			y2 = 1
43			pt = Point.new(x2, y2)
44			point = ctm.Mult(pt)
45			puts "    Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f" % [ctm.m_h, ctm.m_v, point.x, point.y]
46			
47			if element.GetType() == Element::E_image
48				image = Image.new(element.GetXObject())
49				
50				fname = "image_extract1_" + $image_counter.to_s()
51				
52				path = $output_path + fname
53				image.Export(path)
54				
55				#path = $output_path + fname + ".tif"
56				#image.ExportAsTiff(path)
57				
58				#path = $output_path + fname + ".png"
59				#image.ExportAsPng(path)
60			end
61		elsif element.GetType() == Element::E_form
62			reader.FormBegin()
63			ImageExtract(reader)
64			reader.End()	
65		end		
66		element = reader.Next()
67	end
68end
69
70	# Initialize PDFNet
71	PDFNet.Initialize(PDFTronLicense.Key)	
72	
73	# Example 1: 
74	# Extract images by traversing the display list for 
75	# every page. With this approach it is possible to obtain 
76	# image positioning information and DPI.
77	
78	doc = PDFDoc.new($input_path + "newsletter.pdf")
79	doc.InitSecurityHandler()
80	
81	reader = ElementReader.new()
82	
83	# Read every page
84	itr = doc.GetPageIterator()
85	while itr.HasNext() do
86		reader.Begin(itr.Current())
87		ImageExtract(reader)
88		reader.End()
89		itr.Next()
90	end
91
92	doc.Close()
93
94	puts "Done."	
95	puts "----------------------------------------------------------------"
96	
97	# Example 2: 
98	# Extract images by scanning the low-level document.
99	
100	doc = PDFDoc.new($input_path + "newsletter.pdf")
101	doc.InitSecurityHandler()
102	$image_counter= 0
103	
104	cos_doc = doc.GetSDFDoc()
105	num_objs = cos_doc.XRefSize()
106	i = 1
107	while i < num_objs do
108		obj = cos_doc.GetObj(i)
109
110		if !(obj.nil?) and !(obj.IsFree()) and obj.IsStream()
111			# Process only images
112			itr = obj.Find("Type")
113
114			if !(itr.HasNext()) or !(itr.Value().GetName() == "XObject")
115				i = i + 1
116				next
117			end
118			
119			itr = obj.Find("Subtype")
120			if !(itr.HasNext()) or !(itr.Value().GetName() == "Image")
121				i = i + 1
122				next
123			end
124			
125			image = Image.new(obj)
126			$image_counter = $image_counter + 1
127			puts "--> Image: " + $image_counter.to_s()
128			puts "    Width: " + image.GetImageWidth().to_s()
129			puts "    Height: " + image.GetImageHeight().to_s()
130			puts "    BPC: " + image.GetBitsPerComponent().to_s()
131			
132			fname = "image_extract2_" + $image_counter.to_s()
133				
134			path = $output_path + fname
135			image.Export(path)
136			
137			#path = $output_path + fname + ".tif"
138			#image.ExportAsTiff(path)
139			
140			#path = $output_path + fname + ".png"
141			#image.ExportAsPng(path)
142		end
143		i = i + 1
144	end
145	doc.Close()
146	PDFNet.Terminate
147	puts "Done."

1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports System.Drawing
7Imports System.Drawing.Imaging
8
9Imports pdftron
10Imports pdftron.Common
11Imports PDFTRON.SDF
12Imports pdftron.PDF
13
14Module ImageExtractTestVB
15	Dim pdfNetLoader As PDFNetLoader
16	Sub New()
17		pdfNetLoader = pdftron.PDFNetLoader.Instance()
18	End Sub
19
20	'-----------------------------------------------------------------------------------
21	' This sample illustrates one approach to PDF image extraction 
22	' using PDFNet.
23	' 
24	' Note: Besides direct image export, you can also convert PDF images 
25	' to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
26	' using element.GetImageData() (as illustrated in ElementReaderAdv 
27	' sample project).
28	'-----------------------------------------------------------------------------------
29
30	Dim image_counter As Integer = 0
31
32	' Relative path to the folder containing test files.
33	Dim input_path As String = "../../../../TestFiles/"
34	Dim output_path As String = "../../../../TestFiles/Output/"
35
36
37	Sub ImageExtract(ByRef reader As ElementReader)
38		Dim element As Element = reader.Next()
39		While (Not IsNothing(element))		 ' Read page contents
40			Dim type As Element.Type = element.GetType()
41
42			If type = element.Type.e_image Or type = element.Type.e_inline_image Then
43				image_counter = image_counter + 1
44				Console.WriteLine("--> Image: {0}", image_counter)
45				Console.WriteLine("    Width: {0}", element.GetImageWidth())
46				Console.WriteLine("    Height: {0}", element.GetImageHeight())
47				Console.WriteLine("    BPC: {0}", element.GetBitsPerComponent())
48
49				Dim ctm As Matrix2D = element.GetCTM()
50				Dim x2 As Double = 1
51				Dim y2 As Double = 1
52				ctm.Mult(x2, y2)
53				Console.WriteLine("    Coords: x1=" + String.Format("{0:N2}", ctm.m_h) + ", y1=" + String.Format("{0:N2}", ctm.m_v) + ", x2=" + String.Format("{0:N2}", x2) + ", y2=" + String.Format("{0:N2}", y2))
54
55				If element.GetType() = element.Type.e_image Then
56					Dim fname As String = output_path + "image_extract1_" + image_counter.ToString()
57					Dim image As PDFTRON.PDF.Image = New PDFTRON.PDF.Image(element.GetXObject())
58					image.Export(fname)					' or ExporAsPng() or ExporAsTiff() ...
59
60					' Convert PDF bitmap to GDI+ Bitmap...
61					' Dim bmp As Bitmap = element.GetBitmap()
62					' bmp.Save(fname, ImageFormat.Png)
63					' bmp.Dispose()
64
65					' Instead of converting PDF images to a Bitmap, you can also extract 
66					' uncompressed/compressed image data directly using element.GetImageData() 
67					' as illustrated in ElementReaderAdv sample project.
68				End If
69			ElseIf type = element.Type.e_form Then
70				reader.FormBegin()				   ' Process form XObjects
71				ImageExtract(reader)
72				reader.End()
73			End If
74
75			element = reader.Next()
76		End While
77	End Sub
78
79	Sub Main()
80
81		PDFNet.Initialize(PDFTronLicense.Key)
82
83		' Example 1: 
84		' Extract images by traversing the display list for 
85		' every page. With this approach it is possible to obtain 
86		' image positioning information and DPI.
87		Try
88			Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
89				doc.InitSecurityHandler()
90				Using reader As ElementReader = New ElementReader
91
92					Dim itr As PageIterator = doc.GetPageIterator()
93					While itr.HasNext()
94						reader.Begin(itr.Current())
95						ImageExtract(reader)
96						reader.End()
97						itr.Next()
98					End While
99
100				End Using
101			End Using
102			Console.WriteLine("Done.")
103		Catch ex As PDFNetException
104			Console.WriteLine(ex.Message)
105		Catch ex As Exception
106			MsgBox(ex.Message)
107		End Try
108		Console.WriteLine("----------------------------------------------------------------")
109
110		' Example 2: 
111		' Extract images by scanning the low-level document.
112		Try
113			Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
114				doc.InitSecurityHandler()
115				image_counter = 0
116
117				Dim cos_doc As SDFDoc = doc.GetSDFDoc()
118				Dim num_objs As Integer = cos_doc.XRefSize()
119
120				For i As Integer = 1 To num_objs - 1
121					Dim obj As Obj = cos_doc.GetObj(i)
122					If Not (obj Is Nothing Or obj.IsFree()) Then
123						' Process only images
124						If obj.IsStream() Then
125							Dim itr As DictIterator = obj.Find("Type")
126							If itr.HasNext() Then
127								If itr.Value().GetName() = "XObject" Then
128									itr = obj.Find("Subtype")
129									If itr.HasNext() Then
130										If itr.Value().GetName() = "Image" Then
131											Dim image As pdftron.PDF.Image = New pdftron.PDF.Image(obj)
132
133											image_counter = image_counter + 1
134											Console.WriteLine("--> Image: {0}", image_counter)
135											Console.WriteLine("    Width: {0}", image.GetImageWidth())
136											Console.WriteLine("    Height: {0}", image.GetImageHeight())
137											Console.WriteLine("    BPC: {0}", image.GetBitsPerComponent())
138
139											Dim fname As String = output_path + "image_extract2_" + image_counter.ToString()
140											image.Export(fname)		   ' or ExporAsPng() or ExporAsTiff() ...
141
142											' Convert PDF bitmap to GDI+ Bitmap...
143											' Dim bmp As Bitmap = element.GetBitmap()
144											' bmp.Save(fname, ImageFormat.Png)
145											' bmp.Dispose()
146
147											' Instead of converting PDF images to a Bitmap, you can also extract 
148											' uncompressed/compressed image data directly using element.GetImageData() 
149											' as illustrated in ElementReaderAdv sample project.
150										End If
151									End If
152								End If
153							End If
154						End If
155					End If
156				Next
157			End Using
158			Console.WriteLine("Done.")	
159		Catch ex As PDFNetException
160			Console.WriteLine(ex.Message)
161		Catch ex As Exception
162			MsgBox(ex.Message)
163		End Try
164		PDFNet.Terminate()
165	End Sub
166End Module

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

Extract Image from PDFs - Python Sample Code