Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our Server SDK and PDF Data Extraction SDK Capabilities.
1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using System.Drawing;
7using System.Drawing.Imaging;
8
9using pdftron;
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13using pdftron.Filters;
14
15namespace ImageExtractTestCS
16{
17 class Class1
18 {
19 /// <summary>
20 ///-----------------------------------------------------------------------------------
21 /// This sample illustrates one approach to PDF image extraction
22 /// using PDFNet.
23 ///
24 /// Note: Besides direct image export, you can also convert PDF images
25 /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
26 /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
27 /// sample project).
28 ///-----------------------------------------------------------------------------------
29 /// </summary>
30
31 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
32 static Class1() {}
33
34 static int image_counter = 0;
35
36 // Relative path to the folder containing test files.
37 static string input_path = "../../../../TestFiles/";
38 static string output_path = "../../../../TestFiles/Output/";
39
40 static void ImageExtract(PDFDoc doc, ElementReader reader)
41 {
42 Element element;
43 while ((element = reader.Next()) != null)
44 {
45 switch (element.GetType())
46 {
47 case Element.Type.e_image:
48 case Element.Type.e_inline_image:
49 {
50 Console.WriteLine("--> Image: {0}", ++image_counter);
51 Console.WriteLine(" Width: {0}", element.GetImageWidth());
52 Console.WriteLine(" Height: {0}", element.GetImageHeight());
53 Console.WriteLine(" BPC: {0}", element.GetBitsPerComponent());
54
55 Matrix2D ctm = element.GetCTM();
56 double x2=1, y2=1, y1=ctm.m_v;
57 ctm.Mult(ref x2, ref y2);
58 // Write the coords to 3 decimal places.
59 Console.WriteLine(" Coords: x1={0:N2}, y1={1:N2}, x2={2:N2}, y2={3:N2}", ctm.m_h, ctm.m_v, x2, y2);
60 pdftron.PDF.Image image = null;
61 if (element.GetType() == Element.Type.e_image)
62 {
63 image = new pdftron.PDF.Image(element.GetXObject());
64
65 string fname = output_path + "image_extract1_" + image_counter.ToString();
66 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
67 }
68 break;
69 }
70 case Element.Type.e_form: // Process form XObjects
71 {
72 reader.FormBegin();
73 ImageExtract(doc, reader);
74 reader.End();
75 break;
76 }
77 }
78 }
79 }
80
81 static void Main(string[] args)
82 {
83 PDFNet.Initialize(PDFTronLicense.Key);
84
85 // Example 1:
86 // Extract images by traversing the display list for
87 // every page. With this approach it is possible to obtain
88 // image positioning information and DPI.
89 try
90 {
91 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
92 using (ElementReader reader = new ElementReader())
93 {
94 doc.InitSecurityHandler();
95 PageIterator itr;
96 for (itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())
97 {
98 reader.Begin(itr.Current());
99 ImageExtract(doc, reader);
100 reader.End();
101 }
102
103 Console.WriteLine("Done.");
104 }
105 }
106 catch (PDFNetException e)
107 {
108 Console.WriteLine(e.Message);
109 }
110
111 Console.WriteLine("----------------------------------------------------------------");
112
113 // Example 2:
114 // Extract images by scanning the low-level document.
115 try
116 {
117 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
118 {
119 doc.InitSecurityHandler();
120 image_counter = 0;
121
122 SDFDoc cos_doc = doc.GetSDFDoc();
123 int num_objs = cos_doc.XRefSize();
124 for (int i=1; i<num_objs; ++i)
125 {
126 Obj obj = cos_doc.GetObj(i);
127 if (obj!=null && !obj.IsFree()&& obj.IsStream())
128 {
129 // Process only images
130 DictIterator itr = obj.Find("Subtype");
131 if (!itr.HasNext() || itr.Value().GetName() != "Image")
132 continue;
133
134 itr = obj.Find("Type");
135 if (!itr.HasNext() || itr.Value().GetName() != "XObject")
136 continue;
137
138 pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
139
140 Console.WriteLine("--> Image: {0}", ++image_counter);
141 Console.WriteLine(" Width: {0}", image.GetImageWidth());
142 Console.WriteLine(" Height: {0}", image.GetImageHeight());
143 Console.WriteLine(" BPC: {0}", image.GetBitsPerComponent());
144
145 string fname = output_path + "image_extract2_" + image_counter.ToString();
146 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
147
148 // Convert PDF bitmap to GDI+ Bitmap...
149 //Bitmap bmp = image.GetBitmap();
150 //bmp.Save(fname, ImageFormat.Png);
151 //bmp.Dispose();
152
153 // Instead of converting PDF images to a Bitmap, you can also extract
154 // uncompressed/compressed image data directly using element.GetImageData()
155 // as illustrated in ElementReaderAdv sample project.
156 }
157 }
158 Console.WriteLine("Done.");
159 }
160 }
161 catch (PDFNetException e)
162 {
163 Console.WriteLine(e.Message);
164 }
165 PDFNet.Terminate();
166
167 }
168 }
169}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 "strconv"
10 . "pdftron"
11)
12
13import "pdftron/Samples/LicenseKey/GO"
14
15//-----------------------------------------------------------------------------------
16// This sample illustrates one approach to PDF image extraction
17// using PDFNet.
18//
19// Note: Besides direct image export, you can also convert PDF images
20// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
21// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
22// sample project).
23//-----------------------------------------------------------------------------------
24
25var imageCounter = 0
26
27// Relative path to the folder containing the test files.
28var inputPath = "../../TestFiles/"
29var outputPath = "../../TestFiles/Output/"
30
31func ImageExtract(reader ElementReader){
32 element := reader.Next()
33
34 for element.GetMp_elem().Swigcptr() != 0{
35 if (element.GetType() == ElementE_image ||
36 element.GetType() == ElementE_inline_image){
37 imageCounter += 1
38 fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
39 fmt.Println(" Width: " + strconv.Itoa(element.GetImageWidth()))
40 fmt.Println(" Height: " + strconv.Itoa(element.GetImageHeight()))
41 fmt.Println(" BPC: " + strconv.Itoa(element.GetBitsPerComponent()))
42
43 ctm := element.GetCTM()
44 x2 := 1
45 y2 := 1
46 pt := NewPoint(float64(x2), float64(y2))
47 point := ctm.Mult(pt)
48 fmt.Println(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.GetM_h(), ctm.GetM_v(), point.GetX(), point.GetY())
49
50 if element.GetType() == ElementE_image{
51 image := NewImage(element.GetXObject())
52
53 fname := "image_extract1_" + strconv.Itoa(imageCounter)
54
55 path := outputPath + fname
56 image.Export(path)
57
58 //path = outputPath + fname + ".tif"
59 //image.ExportAsTiff(path)
60
61 //path = outputPath + fname + ".png"
62 //image.ExportAsPng(path)
63 }
64 }else if element.GetType() == ElementE_form{
65 reader.FormBegin()
66 ImageExtract(reader)
67 reader.End()
68 }
69 element = reader.Next()
70 }
71}
72
73func main(){
74 // Initialize PDFNet
75 PDFNetInitialize(PDFTronLicense.Key)
76
77 // Example 1:
78 // Extract images by traversing the display list for
79 // every page. With this approach it is possible to obtain
80 // image positioning information and DPI.
81
82 doc := NewPDFDoc(inputPath + "newsletter.pdf")
83 doc.InitSecurityHandler()
84
85 reader := NewElementReader()
86
87 // Read every page
88 itr := doc.GetPageIterator()
89 for itr.HasNext(){
90 reader.Begin(itr.Current())
91 ImageExtract(reader)
92 reader.End()
93 itr.Next()
94 }
95
96 doc.Close()
97 fmt.Println("Done.")
98
99 fmt.Println("----------------------------------------------------------------")
100
101 // Example 2:
102 // Extract images by scanning the low-level document.
103
104 doc = NewPDFDoc(inputPath + "newsletter.pdf")
105 doc.InitSecurityHandler()
106 imageCounter= 0
107
108 cosDoc := doc.GetSDFDoc()
109 numObjs := cosDoc.XRefSize()
110 i := uint(1)
111 for i < numObjs{
112 obj := cosDoc.GetObj(i)
113 if(obj != nil && !obj.IsFree() && obj.IsStream()){
114
115 // Process only images
116 itr := obj.Find("Type")
117
118 if (!itr.HasNext()) || (itr.Value().GetName() != "XObject"){
119 i = i + 1
120 continue
121 }
122 itr = obj.Find("Subtype")
123 if (!itr.HasNext()) || (itr.Value().GetName() != "Image"){
124 i = i + 1
125 continue
126 }
127 image := NewImage(obj)
128
129 imageCounter = imageCounter + 1
130 fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
131 fmt.Println(" Width: " + strconv.Itoa(image.GetImageWidth()))
132 fmt.Println(" Height: " + strconv.Itoa(image.GetImageHeight()))
133 fmt.Println(" BPC: " + strconv.Itoa(image.GetBitsPerComponent()))
134
135 fname := "image_extract2_" + strconv.Itoa(imageCounter)
136
137 path := outputPath + fname
138 image.Export(path)
139
140 //path = outputPath + fname + ".tif"
141 //image.ExportAsTiff(path)
142
143 //path = outputPath + fname + ".png"
144 //image.ExportAsPng(path)
145 }
146 i = i + 1
147 }
148 doc.Close()
149 PDFNetTerminate()
150 fmt.Println("Done.")
151}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.Matrix2D;
7import com.pdftron.common.PDFNetException;
8import com.pdftron.pdf.*;
9import com.pdftron.sdf.DictIterator;
10import com.pdftron.sdf.Obj;
11import com.pdftron.sdf.SDFDoc;
12
13///-----------------------------------------------------------------------------------
14/// This sample illustrates one approach to PDF image extraction
15/// using PDFNet.
16///
17/// Note: Besides direct image export, you can also convert PDF images
18/// to Java image, or extract uncompressed/compressed image data directly
19/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
20/// sample project).
21///-----------------------------------------------------------------------------------
22public class ImageExtractTest {
23
24 // Relative paths to folders containing test files.
25 static String input_path = "../../TestFiles/";
26 static String output_path = "../../TestFiles/Output/";
27
28 static int image_counter = 0;
29
30 static void ImageExtract(ElementReader reader) throws PDFNetException {
31 Element element;
32 while ((element = reader.next()) != null) {
33 switch (element.getType()) {
34 case Element.e_image:
35 case Element.e_inline_image: {
36 System.out.println("--> Image: " + (++image_counter));
37 System.out.println(" Width: " + element.getImageWidth());
38 System.out.println(" Height: " + element.getImageHeight());
39 System.out.println(" BPC: " + element.getBitsPerComponent());
40
41 Matrix2D ctm = element.getCTM();
42 double x2 = 1, y2 = 1;
43 java.awt.geom.Point2D.Double p = ctm.multPoint(x2, y2);
44 System.out.println(String.format(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.getH(), ctm.getV(), p.getX(), p.getY()));
45
46 if (element.getType() == Element.e_image) {
47 Image image = new Image(element.getXObject());
48
49 String fname = "image_extract1_" + image_counter;
50
51 String path = output_path + fname;
52 image.export(path);
53
54 //String path2 = output_path + fname + ".tif";
55 //image.exportAsTiff(path2);
56
57 //String path3 = output_path + fname + ".png";
58 //image.exportAsPng(path3);
59 }
60 }
61 break;
62 case Element.e_form: // Process form XObjects
63 reader.formBegin();
64 ImageExtract(reader);
65 reader.end();
66 break;
67 }
68 }
69 }
70
71 public static void main(String[] args) {
72 // Initialize PDFNet
73 PDFNet.initialize(PDFTronLicense.Key());
74
75 // Example 1:
76 // Extract images by traversing the display list for
77 // every page. With this approach it is possible to obtain
78 // image positioning information and DPI.
79 try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
80 doc.initSecurityHandler();
81 ElementReader reader = new ElementReader();
82 // Read every page
83 for (PageIterator itr = doc.getPageIterator(); itr.hasNext(); ) {
84 reader.begin(itr.next());
85 ImageExtract(reader);
86 reader.end();
87 }
88 System.out.println("Done.");
89 } catch (Exception e) {
90 e.printStackTrace();
91 }
92
93
94 System.out.println("----------------------------------------------------------------");
95
96 // Example 2:
97 // Extract images by scanning the low-level document.
98 try (PDFDoc doc = new PDFDoc((input_path + "newsletter.pdf"))) {
99 doc.initSecurityHandler();
100 image_counter = 0;
101 SDFDoc cos_doc = doc.getSDFDoc();
102 long num_objs = cos_doc.xRefSize();
103 for (int i = 1; i < num_objs; ++i) {
104 Obj obj = cos_doc.getObj(i);
105 if (obj != null && !obj.isFree() && obj.isStream()) {
106 // Process only images
107 DictIterator itr = obj.find("Type");
108 if (!itr.hasNext() || !itr.value().getName().equals("XObject"))
109 continue;
110
111 itr = obj.find("Subtype");
112 if (!itr.hasNext() || !itr.value().getName().equals("Image"))
113 continue;
114
115 Image image = new Image(obj);
116
117 System.out.println("--> Image: " + (++image_counter));
118 System.out.println(" Width: " + image.getImageWidth());
119 System.out.println(" Height: " + image.getImageHeight());
120 System.out.println(" BPC: " + image.getBitsPerComponent());
121
122 String fname = "image_extract2_" + image_counter;
123 String path = output_path + fname;
124 image.export(path);
125
126 //String path= output_path + fname + ".tif";
127 //image.exportAsTiff(path);
128
129 //String path = output_path + fname + ".png";
130 //image.exportAsPng(path);
131 }
132 }
133
134 System.out.println("Done.");
135 } catch (Exception e) {
136 e.printStackTrace();
137 }
138
139 PDFNet.terminate();
140 }
141}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <PDF/PDFNet.h>
7#include <PDF/PDFDoc.h>
8#include <PDF/ElementReader.h>
9#include <PDF/Image.h>
10#include "../../LicenseKey/CPP/LicenseKey.h"
11
12//-----------------------------------------------------------------------------------
13// This sample illustrates one approach to PDF image extraction
14// using PDFNet.
15//
16// Note: Besides direct image export, you can also convert PDF images
17// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
18// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
19// sample project).
20//-----------------------------------------------------------------------------------
21
22#include <iostream>
23#include <iomanip>
24
25using namespace std;
26
27using namespace pdftron;
28using namespace Common;
29using namespace SDF;
30using namespace PDF;
31
32// Relative paths to folders containing test files.
33string input_path = "../../TestFiles/";
34string output_path = "../../TestFiles/Output/";
35
36int image_counter = 0;
37
38void ImageExtract(ElementReader& reader)
39{
40 // Set the precision for printing doubles on cout to 3 decimal places.
41 ios iostate(NULL);
42 iostate.copyfmt(cout);
43 cout << fixed << showpoint << setprecision(3);
44
45 Element element;
46 while ((element = reader.Next()) != 0)
47 {
48 switch (element.GetType())
49 {
50 case Element::e_image:
51 case Element::e_inline_image:
52 {
53 cout << "--> Image: " << ++image_counter << endl;
54 cout << " Width: " << element.GetImageWidth() << endl;
55 cout << " Height: " << element.GetImageHeight() << endl;
56 cout << " BPC: " << element.GetBitsPerComponent() << endl;
57
58 Common::Matrix2D ctm = element.GetCTM();
59 double x2=1, y2=1;
60 ctm.Mult(x2, y2);
61 printf(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f\n", ctm.m_h, ctm.m_v, x2, y2);
62
63 if (element.GetType() == Element::e_image)
64 {
65 Image image(element.GetXObject());
66
67 char fname[256];
68 sprintf(fname, "image_extract1_%d", image_counter);
69
70 string path(output_path + fname);
71 image.Export(path.c_str());
72
73 //string path(output_path + fname + ".tif");
74 //image.ExportAsTiff(path.c_str());
75
76 //string path(output_path + fname + ".png");
77 //image.ExportAsPng(path.c_str());
78 }
79 }
80 break;
81 case Element::e_form: // Process form XObjects
82 reader.FormBegin();
83 ImageExtract(reader);
84 reader.End();
85 break;
86 }
87 }
88
89 // Reset cout's state.
90 cout.copyfmt(iostate);
91}
92
93int main(int argc, char *argv[])
94{
95 int ret = 0;
96
97 // Initialize PDFNet
98 PDFNet::Initialize(LicenseKey);
99
100 // Example 1:
101 // Extract images by traversing the display list for
102 // every page. With this approach it is possible to obtain
103 // image positioning information and DPI.
104 try
105 {
106 PDFDoc doc((input_path + "newsletter.pdf").c_str());
107 doc.InitSecurityHandler();
108 ElementReader reader;
109 // Read every page
110 for (PageIterator itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())
111 {
112 reader.Begin(itr.Current());
113 ImageExtract(reader);
114 reader.End();
115 }
116
117 cout << "Done." << endl;
118 }
119 catch(Common::Exception& e)
120 {
121 cout << e << endl;
122 ret = 1;
123 }
124 catch(...)
125 {
126 cout << "Unknown Exception" << endl;
127 ret = 1;
128 }
129
130 cout << "----------------------------------------------------------------" << endl;
131
132 // Example 2:
133 // Extract images by scanning the low-level document.
134 try
135 {
136 PDFDoc doc((input_path + "newsletter.pdf").c_str());
137
138 doc.InitSecurityHandler();
139 image_counter = 0;
140
141 SDFDoc& cos_doc=doc.GetSDFDoc();
142 int num_objs = cos_doc.XRefSize();
143 for(int i=1; i<num_objs; ++i)
144 {
145 Obj obj = cos_doc.GetObj(i);
146 if(obj && !obj.IsFree() && obj.IsStream())
147 {
148 // Process only images
149 DictIterator itr = obj.Find("Type");
150 if(!itr.HasNext() || strcmp(itr.Value().GetName(), "XObject"))
151 continue;
152
153 itr = obj.Find("Subtype");
154 if(!itr.HasNext() || strcmp(itr.Value().GetName(), "Image"))
155 continue;
156
157 PDF::Image image(obj);
158 cout << "--> Image: " << ++image_counter << endl;
159 cout << " Width: " << image.GetImageWidth() << endl;
160 cout << " Height: " << image.GetImageHeight() << endl;
161 cout << " BPC: " << image.GetBitsPerComponent() << endl;
162
163 char fname[256];
164 sprintf(fname, "image_extract2_%d", image_counter);
165 string path(output_path + fname);
166 image.Export(path.c_str());
167
168 //string path(output_path + fname + ".tif");
169 //image.ExportAsTiff(path.c_str());
170
171 //string path(output_path + fname + ".png");
172 //image.ExportAsPng(path.c_str());
173 }
174 }
175
176 cout << "Done." << endl;
177 }
178 catch(Common::Exception& e)
179 {
180 cout << e << endl;
181 ret = 1;
182 }
183 catch(...)
184 {
185 cout << "Unknown Exception" << endl;
186 ret = 1;
187 }
188
189 PDFNet::Terminate();
190 return ret;
191}
1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10// Relative path to the folder containing the test files.
11$input_path = getcwd()."/../../TestFiles/";
12$output_path = $input_path."Output/";
13
14//-----------------------------------------------------------------------------------
15// This sample illustrates one approach to PDF image extraction
16// using PDFNet.
17//
18// Note: Besides direct image export, you can also convert PDF images
19// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
20// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
21// sample project).
22//-----------------------------------------------------------------------------------
23
24$image_counter = 0;
25
26function ImageExtract($reader)
27{
28 while (($element = $reader->Next()) != null)
29 {
30 switch ($element->GetType())
31 {
32 case Element::e_image:
33 case Element::e_inline_image:
34 {
35 global $image_counter;
36 echo nl2br("--> Image: ".++$image_counter."\n");
37 echo nl2br(" Width: ".$element->GetImageWidth()."\n");
38 echo nl2br(" Height: ".$element->GetImageHeight()."\n");
39 echo nl2br(" BPC: ".$element->GetBitsPerComponent()."\n");
40
41 $ctm = $element->GetCTM();
42 $x2=1.0;
43 $y2=1.0;
44 $point = $ctm->Mult(new Point($x2, $y2));
45 printf(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f\n", $ctm->m_h, $ctm->m_v, $point->x, $point->y);
46 if ($element->GetType() == Element::e_image)
47 {
48 $image = new Image($element->GetXObject());
49
50 $fname = "image_extract1_".$image_counter;
51 global $output_path;
52 $path = $output_path.$fname;
53 $image->Export($path);
54
55 //$path = $output_path.$fname.".tif";
56 //$image->ExportAsTiff($path);
57
58 //$path = $output_path $fname.".png";
59 //$image->ExportAsPng($path);
60 }
61 }
62 break;
63 case Element::e_form: // Process form XObjects
64 $reader->FormBegin();
65 ImageExtract($reader);
66 $reader->End();
67 break;
68 }
69 }
70}
71
72 // Initialize PDFNet
73 PDFNet::Initialize($LicenseKey);
74 PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
75
76 // Example 1:
77 // Extract images by traversing the display list for
78 // every page. With this approach it is possible to obtain
79 // image positioning information and DPI.
80 $doc = new PDFDoc($input_path."newsletter.pdf");
81 $doc->InitSecurityHandler();
82
83 $reader = new ElementReader();
84 // Read every page
85 for ($itr=$doc->GetPageIterator(); $itr->HasNext(); $itr->Next())
86 {
87 $reader->Begin($itr->Current());
88 ImageExtract($reader);
89 $reader->End();
90 }
91
92 $doc->Close();
93 echo nl2br("Done.\n");
94
95 echo nl2br("----------------------------------------------------------------\n");
96
97 // Example 2:
98 // Extract images by scanning the low-level document.
99 $doc = new PDFDoc($input_path."newsletter.pdf");
100
101 $doc->InitSecurityHandler();
102 $image_counter = 0;
103
104 $cos_doc=$doc->GetSDFDoc();
105 $num_objs = $cos_doc->XRefSize();
106 for($i=1; $i<$num_objs; ++$i)
107 {
108 $obj = $cos_doc->GetObj($i);
109 if($obj != null && !$obj->IsFree() && $obj->IsStream())
110 {
111 // Process only images
112 $itr = $obj->Find("Type");
113 if(!$itr->HasNext() || !($itr->Value()->GetName() == "XObject"))
114 {
115 continue;
116 }
117
118 $itr = $obj->Find("Subtype");
119 if(!$itr->HasNext() || !($itr->Value()->GetName() == "Image"))
120 {
121 continue;
122 }
123
124 $image = new Image($obj);
125 echo nl2br("--> Image: ".++$image_counter."\n");
126 echo nl2br(" Width: ".$image->GetImageWidth()."\n");
127 echo nl2br(" Height: ".$image->GetImageHeight()."\n");
128 echo nl2br(" BPC: ".$image->GetBitsPerComponent()."\n");
129
130 $fname = "image_extract2_".$image_counter;
131 $path = $output_path.$fname;
132 $image->Export($path);
133
134 //$path = $output_path.$fname.".tif");
135 //$image->ExportAsTiff($path);
136
137 //$path = $output_path.fname.".png");
138 //$image->ExportAsPng($path);
139 }
140 }
141
142 $doc->Close();
143 PDFNet::Terminate();
144 echo nl2br("Done.\n");
145
146?>
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6//-----------------------------------------------------------------------------------
7// This sample illustrates one approach to PDF image extraction
8// using PDFNet.
9//
10// Note: Besides direct image export, you can also convert PDF images
11// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
12// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
13// sample project).
14//-----------------------------------------------------------------------------------
15
16const { PDFNet } = require('@pdftron/pdfnet-node');
17const PDFTronLicense = require('../LicenseKey/LicenseKey');
18
19((exports) => {
20 'use strict';
21
22 exports.runImageExtractTest = () => {
23
24 let image_counter = 0;
25 const outputPath = '../TestFiles/Output/';
26
27 const imageExtract = async (reader) => {
28 let element;
29 while ((element = await reader.next()) !== null) {
30 switch (await element.getType()) {
31 case PDFNet.Element.Type.e_image:
32 case PDFNet.Element.Type.e_inline_image:
33 console.log('--> Image: ' + ++image_counter);
34 console.log(' Width: ' + await element.getImageWidth());
35 console.log(' Height: ' + await element.getImageHeight());
36 console.log(' BPC: ' + await element.getBitsPerComponent());
37
38 const ctm = await element.getCTM();
39 let x2 = 1, y2 = 1;
40 const result = await ctm.mult(x2, y2);
41 x2 = result.x;
42 y2 = result.y;
43 console.log(' Coords: x1=' + ctm.m_h.toFixed(2) + ', y1=' + ctm.m_v.toFixed(2)
44 + ', x2=' + x2.toFixed(2) + ', y2=' + y2.toFixed(2));
45
46 if (await element.getType() == PDFNet.Element.Type.e_image) {
47 const image = await PDFNet.Image.createFromObj(await element.getXObject());
48 image.export(outputPath + 'image_extract1_' + image_counter);
49 }
50 break;
51 case PDFNet.Element.Type.e_form: // Process form XObjects
52 reader.formBegin();
53 await imageExtract(reader);
54 reader.end();
55 break;
56 }
57 }
58 }
59
60 const main = async () => {
61
62 // Example 1:
63 // Extract images by traversing the display list for
64 // every page. With this approach it is possible to obtain
65 // image positioning information and DPI.
66 try {
67 const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
68 doc.initSecurityHandler();
69
70 const reader = await PDFNet.ElementReader.create();
71 const itr = await doc.getPageIterator(1);
72 // Read every page
73 for (itr; await itr.hasNext(); await itr.next()) {
74 const page = await itr.current();
75 reader.beginOnPage(page);
76 await imageExtract(reader);
77 reader.end();
78 }
79
80 console.log('Done.');
81 } catch (err) {
82 console.log(err);
83 }
84
85 console.log('----------------------------------------------------------------');
86
87 // Example 2:
88 // Extract images by scanning the low-level document.
89 try {
90 const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
91 doc.initSecurityHandler();
92 image_counter = 0;
93
94 const cos_doc = await doc.getSDFDoc();
95 const num_objs = await cos_doc.xRefSize();
96 for (var i = 0; i < num_objs; i++) {
97 const obj = await cos_doc.getObj(i);
98 if (obj && !(await obj.isFree()) && await obj.isStream()) {
99 // Process only images
100 var itr = await obj.find('Type');
101 if (!(await itr.hasNext()) || await (await itr.value()).getName() !== 'XObject')
102 continue;
103
104 itr = await obj.find('Subtype');
105 if (!(await itr.hasNext()) || await (await itr.value()).getName() !== 'Image')
106 continue;
107 const image = await PDFNet.Image.createFromObj(obj);
108 console.log('--> Image: ' + ++image_counter);
109 console.log(' Width: ' + await image.getImageWidth());
110 console.log(' Height: ' + await image.getImageHeight());
111 console.log(' BPC: ' + await image.getBitsPerComponent());
112
113 image.export(outputPath + 'image_extract2_' + image_counter);
114 }
115 }
116
117 console.log('Done.');
118 } catch (err) {
119 console.log(err);
120 }
121
122 }
123 PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error) {
124 console.log('Error: ' + JSON.stringify(error));
125 }).then(function(){ return PDFNet.shutdown(); });
126 };
127 exports.runImageExtractTest();
128})(exports);
129// eslint-disable-next-line spaced-comment
130//# sourceURL=ImageExtractTest.js
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10
11sys.path.append("../../LicenseKey/PYTHON")
12from LicenseKey import *
13
14#-----------------------------------------------------------------------------------
15# This sample illustrates one approach to PDF image extraction
16# using PDFNet.
17#
18# Note: Besides direct image export, you can also convert PDF images
19# to GDI+ Bitmap, or extract uncompressed/compressed image data directly
20# using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
21# sample project).
22#-----------------------------------------------------------------------------------
23
24image_counter = 0
25
26# Relative path to the folder containing the test files.
27input_path = "../../TestFiles/"
28output_path = "../../TestFiles/Output/"
29
30def ImageExtract(reader):
31 element = reader.Next()
32 while element != None:
33 if (element.GetType() == Element.e_image or
34 element.GetType() == Element.e_inline_image):
35 global image_counter
36 image_counter =image_counter + 1
37 print("--> Image: " + str(image_counter))
38 print(" Width: " + str(element.GetImageWidth()))
39 print(" Height: " + str(element.GetImageHeight()))
40 print(" BPC: " + str(element.GetBitsPerComponent()))
41
42 ctm = element.GetCTM()
43 x2 = 1
44 y2 = 1
45 pt = Point(x2, y2)
46 point = ctm.Mult(pt)
47 print(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f" % (ctm.m_h, ctm.m_v, point.x, point.y))
48
49 if element.GetType() == Element.e_image:
50 image = Image(element.GetXObject())
51
52 fname = "image_extract1_" + str(image_counter)
53
54 path = output_path + fname
55 image.Export(path)
56
57 #path = output_path + fname + ".tif"
58 #image.ExportAsTiff(path)
59
60 #path = output_path + fname + ".png"
61 #image.ExportAsPng(path)
62 elif element.GetType() == Element.e_form:
63 reader.FormBegin()
64 ImageExtract(reader)
65 reader.End()
66 element = reader.Next()
67
68def main():
69 # Initialize PDFNet
70 PDFNet.Initialize(LicenseKey)
71
72 # Example 1:
73 # Extract images by traversing the display list for
74 # every page. With this approach it is possible to obtain
75 # image positioning information and DPI.
76
77 doc = PDFDoc(input_path + "newsletter.pdf")
78 doc.InitSecurityHandler()
79
80 reader = ElementReader()
81
82 # Read every page
83 itr = doc.GetPageIterator()
84 while itr.HasNext():
85 reader.Begin(itr.Current())
86 ImageExtract(reader)
87 reader.End()
88 itr.Next()
89
90 doc.Close()
91 print("Done.")
92
93 print("----------------------------------------------------------------")
94
95 # Example 2:
96 # Extract images by scanning the low-level document.
97
98 doc = PDFDoc(input_path + "newsletter.pdf")
99 doc.InitSecurityHandler()
100 image_counter= 0
101
102 cos_doc = doc.GetSDFDoc()
103 num_objs = cos_doc.XRefSize()
104 i = 1
105 while i < num_objs:
106 obj = cos_doc.GetObj(i)
107 if(obj is not None and not obj.IsFree() and obj.IsStream()):
108
109 # Process only images
110 itr = obj.Find("Type")
111
112 if not itr.HasNext() or not itr.Value().GetName() == "XObject":
113 i = i + 1
114 continue
115
116 itr = obj.Find("Subtype")
117 if not itr.HasNext() or not itr.Value().GetName() == "Image":
118 i = i + 1
119 continue
120
121 image = Image(obj)
122
123 image_counter = image_counter + 1
124 print("--> Image: " + str(image_counter))
125 print(" Width: " + str(image.GetImageWidth()))
126 print(" Height: " + str(image.GetImageHeight()))
127 print(" BPC: " + str(image.GetBitsPerComponent()))
128
129 fname = "image_extract2_" + str(image_counter)
130
131 path = output_path + fname
132 image.Export(path)
133
134 #path = output_path + fname + ".tif"
135 #image.ExportAsTiff(path)
136
137 #path = output_path + fname + ".png"
138 #image.ExportAsPng(path)
139 i = i + 1
140 doc.Close()
141 PDFNet.Terminate()
142 print("Done.")
143
144if __name__ == '__main__':
145 main()
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12#-----------------------------------------------------------------------------------
13# This sample illustrates one approach to PDF image extraction
14# using PDFNet.
15#
16# Note: Besides direct image export, you can also convert PDF images
17# to GDI+ Bitmap, or extract uncompressed/compressed image data directly
18# using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
19# sample project).
20#-----------------------------------------------------------------------------------
21
22$image_counter = 0
23
24# Relative path to the folder containing the test files.
25$input_path = "../../TestFiles/"
26$output_path = "../../TestFiles/Output/"
27
28def ImageExtract(reader)
29 element = reader.Next()
30 while !(element.nil?) do
31 if (element.GetType() == Element::E_image or
32 element.GetType() == Element::E_inline_image)
33
34 $image_counter =$image_counter + 1
35 puts "--> Image: " + $image_counter.to_s()
36 puts " Width: " + element.GetImageWidth().to_s()
37 puts " Height: " + element.GetImageHeight().to_s()
38 puts " BPC: " + element.GetBitsPerComponent().to_s()
39
40 ctm = element.GetCTM()
41 x2 = 1
42 y2 = 1
43 pt = Point.new(x2, y2)
44 point = ctm.Mult(pt)
45 puts " Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f" % [ctm.m_h, ctm.m_v, point.x, point.y]
46
47 if element.GetType() == Element::E_image
48 image = Image.new(element.GetXObject())
49
50 fname = "image_extract1_" + $image_counter.to_s()
51
52 path = $output_path + fname
53 image.Export(path)
54
55 #path = $output_path + fname + ".tif"
56 #image.ExportAsTiff(path)
57
58 #path = $output_path + fname + ".png"
59 #image.ExportAsPng(path)
60 end
61 elsif element.GetType() == Element::E_form
62 reader.FormBegin()
63 ImageExtract(reader)
64 reader.End()
65 end
66 element = reader.Next()
67 end
68end
69
70 # Initialize PDFNet
71 PDFNet.Initialize(PDFTronLicense.Key)
72
73 # Example 1:
74 # Extract images by traversing the display list for
75 # every page. With this approach it is possible to obtain
76 # image positioning information and DPI.
77
78 doc = PDFDoc.new($input_path + "newsletter.pdf")
79 doc.InitSecurityHandler()
80
81 reader = ElementReader.new()
82
83 # Read every page
84 itr = doc.GetPageIterator()
85 while itr.HasNext() do
86 reader.Begin(itr.Current())
87 ImageExtract(reader)
88 reader.End()
89 itr.Next()
90 end
91
92 doc.Close()
93
94 puts "Done."
95 puts "----------------------------------------------------------------"
96
97 # Example 2:
98 # Extract images by scanning the low-level document.
99
100 doc = PDFDoc.new($input_path + "newsletter.pdf")
101 doc.InitSecurityHandler()
102 $image_counter= 0
103
104 cos_doc = doc.GetSDFDoc()
105 num_objs = cos_doc.XRefSize()
106 i = 1
107 while i < num_objs do
108 obj = cos_doc.GetObj(i)
109
110 if !(obj.nil?) and !(obj.IsFree()) and obj.IsStream()
111 # Process only images
112 itr = obj.Find("Type")
113
114 if !(itr.HasNext()) or !(itr.Value().GetName() == "XObject")
115 i = i + 1
116 next
117 end
118
119 itr = obj.Find("Subtype")
120 if !(itr.HasNext()) or !(itr.Value().GetName() == "Image")
121 i = i + 1
122 next
123 end
124
125 image = Image.new(obj)
126 $image_counter = $image_counter + 1
127 puts "--> Image: " + $image_counter.to_s()
128 puts " Width: " + image.GetImageWidth().to_s()
129 puts " Height: " + image.GetImageHeight().to_s()
130 puts " BPC: " + image.GetBitsPerComponent().to_s()
131
132 fname = "image_extract2_" + $image_counter.to_s()
133
134 path = $output_path + fname
135 image.Export(path)
136
137 #path = $output_path + fname + ".tif"
138 #image.ExportAsTiff(path)
139
140 #path = $output_path + fname + ".png"
141 #image.ExportAsPng(path)
142 end
143 i = i + 1
144 end
145 doc.Close()
146 PDFNet.Terminate
147 puts "Done."
1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports System.Drawing
7Imports System.Drawing.Imaging
8
9Imports pdftron
10Imports pdftron.Common
11Imports PDFTRON.SDF
12Imports pdftron.PDF
13
14Module ImageExtractTestVB
15 Dim pdfNetLoader As PDFNetLoader
16 Sub New()
17 pdfNetLoader = pdftron.PDFNetLoader.Instance()
18 End Sub
19
20 '-----------------------------------------------------------------------------------
21 ' This sample illustrates one approach to PDF image extraction
22 ' using PDFNet.
23 '
24 ' Note: Besides direct image export, you can also convert PDF images
25 ' to GDI+ Bitmap, or extract uncompressed/compressed image data directly
26 ' using element.GetImageData() (as illustrated in ElementReaderAdv
27 ' sample project).
28 '-----------------------------------------------------------------------------------
29
30 Dim image_counter As Integer = 0
31
32 ' Relative path to the folder containing test files.
33 Dim input_path As String = "../../../../TestFiles/"
34 Dim output_path As String = "../../../../TestFiles/Output/"
35
36
37 Sub ImageExtract(ByRef reader As ElementReader)
38 Dim element As Element = reader.Next()
39 While (Not IsNothing(element)) ' Read page contents
40 Dim type As Element.Type = element.GetType()
41
42 If type = element.Type.e_image Or type = element.Type.e_inline_image Then
43 image_counter = image_counter + 1
44 Console.WriteLine("--> Image: {0}", image_counter)
45 Console.WriteLine(" Width: {0}", element.GetImageWidth())
46 Console.WriteLine(" Height: {0}", element.GetImageHeight())
47 Console.WriteLine(" BPC: {0}", element.GetBitsPerComponent())
48
49 Dim ctm As Matrix2D = element.GetCTM()
50 Dim x2 As Double = 1
51 Dim y2 As Double = 1
52 ctm.Mult(x2, y2)
53 Console.WriteLine(" Coords: x1=" + String.Format("{0:N2}", ctm.m_h) + ", y1=" + String.Format("{0:N2}", ctm.m_v) + ", x2=" + String.Format("{0:N2}", x2) + ", y2=" + String.Format("{0:N2}", y2))
54
55 If element.GetType() = element.Type.e_image Then
56 Dim fname As String = output_path + "image_extract1_" + image_counter.ToString()
57 Dim image As PDFTRON.PDF.Image = New PDFTRON.PDF.Image(element.GetXObject())
58 image.Export(fname) ' or ExporAsPng() or ExporAsTiff() ...
59
60 ' Convert PDF bitmap to GDI+ Bitmap...
61 ' Dim bmp As Bitmap = element.GetBitmap()
62 ' bmp.Save(fname, ImageFormat.Png)
63 ' bmp.Dispose()
64
65 ' Instead of converting PDF images to a Bitmap, you can also extract
66 ' uncompressed/compressed image data directly using element.GetImageData()
67 ' as illustrated in ElementReaderAdv sample project.
68 End If
69 ElseIf type = element.Type.e_form Then
70 reader.FormBegin() ' Process form XObjects
71 ImageExtract(reader)
72 reader.End()
73 End If
74
75 element = reader.Next()
76 End While
77 End Sub
78
79 Sub Main()
80
81 PDFNet.Initialize(PDFTronLicense.Key)
82
83 ' Example 1:
84 ' Extract images by traversing the display list for
85 ' every page. With this approach it is possible to obtain
86 ' image positioning information and DPI.
87 Try
88 Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
89 doc.InitSecurityHandler()
90 Using reader As ElementReader = New ElementReader
91
92 Dim itr As PageIterator = doc.GetPageIterator()
93 While itr.HasNext()
94 reader.Begin(itr.Current())
95 ImageExtract(reader)
96 reader.End()
97 itr.Next()
98 End While
99
100 End Using
101 End Using
102 Console.WriteLine("Done.")
103 Catch ex As PDFNetException
104 Console.WriteLine(ex.Message)
105 Catch ex As Exception
106 MsgBox(ex.Message)
107 End Try
108 Console.WriteLine("----------------------------------------------------------------")
109
110 ' Example 2:
111 ' Extract images by scanning the low-level document.
112 Try
113 Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
114 doc.InitSecurityHandler()
115 image_counter = 0
116
117 Dim cos_doc As SDFDoc = doc.GetSDFDoc()
118 Dim num_objs As Integer = cos_doc.XRefSize()
119
120 For i As Integer = 1 To num_objs - 1
121 Dim obj As Obj = cos_doc.GetObj(i)
122 If Not (obj Is Nothing Or obj.IsFree()) Then
123 ' Process only images
124 If obj.IsStream() Then
125 Dim itr As DictIterator = obj.Find("Type")
126 If itr.HasNext() Then
127 If itr.Value().GetName() = "XObject" Then
128 itr = obj.Find("Subtype")
129 If itr.HasNext() Then
130 If itr.Value().GetName() = "Image" Then
131 Dim image As pdftron.PDF.Image = New pdftron.PDF.Image(obj)
132
133 image_counter = image_counter + 1
134 Console.WriteLine("--> Image: {0}", image_counter)
135 Console.WriteLine(" Width: {0}", image.GetImageWidth())
136 Console.WriteLine(" Height: {0}", image.GetImageHeight())
137 Console.WriteLine(" BPC: {0}", image.GetBitsPerComponent())
138
139 Dim fname As String = output_path + "image_extract2_" + image_counter.ToString()
140 image.Export(fname) ' or ExporAsPng() or ExporAsTiff() ...
141
142 ' Convert PDF bitmap to GDI+ Bitmap...
143 ' Dim bmp As Bitmap = element.GetBitmap()
144 ' bmp.Save(fname, ImageFormat.Png)
145 ' bmp.Dispose()
146
147 ' Instead of converting PDF images to a Bitmap, you can also extract
148 ' uncompressed/compressed image data directly using element.GetImageData()
149 ' as illustrated in ElementReaderAdv sample project.
150 End If
151 End If
152 End If
153 End If
154 End If
155 End If
156 Next
157 End Using
158 Console.WriteLine("Done.")
159 Catch ex As PDFNetException
160 Console.WriteLine(ex.Message)
161 Catch ex As Exception
162 MsgBox(ex.Message)
163 End Try
164 PDFNet.Terminate()
165 End Sub
166End Module
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales