ImageExtract

Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our Server SDK and PDF Data Extraction SDK Capabilities.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using System.Drawing;
7using System.Drawing.Imaging;
8
9using pdftron;
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13using pdftron.Filters;
14
15namespace ImageExtractTestCS
16{
17 class Class1
18 {
19 /// <summary>
20 ///-----------------------------------------------------------------------------------
21 /// This sample illustrates one approach to PDF image extraction
22 /// using PDFNet.
23 ///
24 /// Note: Besides direct image export, you can also convert PDF images
25 /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
26 /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
27 /// sample project).
28 ///-----------------------------------------------------------------------------------
29 /// </summary>
30
31 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
32 static Class1() {}
33
34 static int image_counter = 0;
35
36 // Relative path to the folder containing test files.
37 static string input_path = "../../../../TestFiles/";
38 static string output_path = "../../../../TestFiles/Output/";
39
40 static void ImageExtract(PDFDoc doc, ElementReader reader)
41 {
42 Element element;
43 while ((element = reader.Next()) != null)
44 {
45 switch (element.GetType())
46 {
47 case Element.Type.e_image:
48 case Element.Type.e_inline_image:
49 {
50 Console.WriteLine("--> Image: {0}", ++image_counter);
51 Console.WriteLine(" Width: {0}", element.GetImageWidth());
52 Console.WriteLine(" Height: {0}", element.GetImageHeight());
53 Console.WriteLine(" BPC: {0}", element.GetBitsPerComponent());
54
55 Matrix2D ctm = element.GetCTM();
56 double x2=1, y2=1, y1=ctm.m_v;
57 ctm.Mult(ref x2, ref y2);
58 // Write the coords to 3 decimal places.
59 Console.WriteLine(" Coords: x1={0:N2}, y1={1:N2}, x2={2:N2}, y2={3:N2}", ctm.m_h, ctm.m_v, x2, y2);
60 pdftron.PDF.Image image = null;
61 if (element.GetType() == Element.Type.e_image)
62 {
63 image = new pdftron.PDF.Image(element.GetXObject());
64
65 string fname = output_path + "image_extract1_" + image_counter.ToString();
66 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
67 }
68 break;
69 }
70 case Element.Type.e_form: // Process form XObjects
71 {
72 reader.FormBegin();
73 ImageExtract(doc, reader);
74 reader.End();
75 break;
76 }
77 }
78 }
79 }
80
81 static void Main(string[] args)
82 {
83 PDFNet.Initialize(PDFTronLicense.Key);
84
85 // Example 1:
86 // Extract images by traversing the display list for
87 // every page. With this approach it is possible to obtain
88 // image positioning information and DPI.
89 try
90 {
91 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
92 using (ElementReader reader = new ElementReader())
93 {
94 doc.InitSecurityHandler();
95 PageIterator itr;
96 for (itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())
97 {
98 reader.Begin(itr.Current());
99 ImageExtract(doc, reader);
100 reader.End();
101 }
102
103 Console.WriteLine("Done.");
104 }
105 }
106 catch (PDFNetException e)
107 {
108 Console.WriteLine(e.Message);
109 }
110
111 Console.WriteLine("----------------------------------------------------------------");
112
113 // Example 2:
114 // Extract images by scanning the low-level document.
115 try
116 {
117 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
118 {
119 doc.InitSecurityHandler();
120 image_counter = 0;
121
122 SDFDoc cos_doc = doc.GetSDFDoc();
123 int num_objs = cos_doc.XRefSize();
124 for (int i=1; i<num_objs; ++i)
125 {
126 Obj obj = cos_doc.GetObj(i);
127 if (obj!=null && !obj.IsFree()&& obj.IsStream())
128 {
129 // Process only images
130 DictIterator itr = obj.Find("Subtype");
131 if (!itr.HasNext() || itr.Value().GetName() != "Image")
132 continue;
133
134 itr = obj.Find("Type");
135 if (!itr.HasNext() || itr.Value().GetName() != "XObject")
136 continue;
137
138 pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
139
140 Console.WriteLine("--> Image: {0}", ++image_counter);
141 Console.WriteLine(" Width: {0}", image.GetImageWidth());
142 Console.WriteLine(" Height: {0}", image.GetImageHeight());
143 Console.WriteLine(" BPC: {0}", image.GetBitsPerComponent());
144
145 string fname = output_path + "image_extract2_" + image_counter.ToString();
146 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
147
148 // Convert PDF bitmap to GDI+ Bitmap...
149 //Bitmap bmp = image.GetBitmap();
150 //bmp.Save(fname, ImageFormat.Png);
151 //bmp.Dispose();
152
153 // Instead of converting PDF images to a Bitmap, you can also extract
154 // uncompressed/compressed image data directly using element.GetImageData()
155 // as illustrated in ElementReaderAdv sample project.
156 }
157 }
158 Console.WriteLine("Done.");
159 }
160 }
161 catch (PDFNetException e)
162 {
163 Console.WriteLine(e.Message);
164 }
165 PDFNet.Terminate();
166
167 }
168 }
169}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales