Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our UWP SDK and PDF Data Extraction SDK Capabilities.
1//
2// Copyright (c) 2001-2020 by PDFTron Systems Inc. All Rights Reserved.
3//
4
5using System;
6using System.IO;
7using System.Threading.Tasks;
8using Windows.Foundation;
9
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13
14using PDFNetUniversalSamples.ViewModels;
15
16namespace PDFNetSamples
17{
18 public sealed class ImageExtractTest : Sample
19 {
20 public ImageExtractTest() :
21 base("ImageExtract", "This sample illustrates couple of approaches to PDF image extraction.")
22 {
23 }
24
25 public override IAsyncAction RunAsync()
26 {
27 return Task.Run(new System.Action(async () =>
28 {
29 WriteLine("--------------------------------");
30 WriteLine("Starting ImageExtract Test...");
31 WriteLine("--------------------------------\n");
32 // Example 1:
33 // Extract images by traversing the display list for
34 // every page. With this approach it is possible to obtain
35 // image positioning information and DPI.
36 try
37 {
38 String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
39 WriteLine("Opening input file " + input_file_path);
40 PDFDoc doc = new PDFDoc(input_file_path);
41 doc.InitSecurityHandler();
42
43 ElementReader reader = new ElementReader();
44 PageIterator itr;
45 for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
46 {
47 reader.Begin(itr.Current());
48 await ImageExtract(reader).ConfigureAwait(false);
49 reader.End();
50 }
51 doc.Destroy();
52 WriteLine("Done.");
53 }
54 catch (Exception e)
55 {
56 WriteLine(GetExceptionMessage(e));
57 }
58
59 WriteLine("----------------------------------------------------------------");
60
61 // Example 2:
62 // Extract images by scanning the low-level document.
63 try
64 {
65 String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
66 PDFDoc doc = new PDFDoc(input_file_path);
67 doc.InitSecurityHandler();
68 image_counter = 0;
69
70 SDFDoc cos_doc = doc.GetSDFDoc();
71 int num_objs = cos_doc.XRefSize();
72 for (int i = 1; i < num_objs; ++i)
73 {
74 Obj obj = cos_doc.GetObj(i);
75 if (obj != null && !obj.IsFree() && obj.IsStream())
76 {
77 // Process only images
78 DictIterator itr = obj.Find("Subtype");
79 if (!itr.HasNext() || itr.Value().GetName() != "Image")
80 continue;
81
82 itr = obj.Find("Type");
83 if (!itr.HasNext() || itr.Value().GetName() != "XObject")
84 continue;
85
86 pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
87
88 WriteLine(string.Format("--> Image: {0}", ++image_counter));
89 WriteLine(string.Format(" Width: {0}", image.GetImageWidth()));
90 WriteLine(string.Format(" Height: {0}", image.GetImageHeight()));
91 WriteLine(string.Format(" BPC: {0}", image.GetBitsPerComponent()));
92
93 string fname = Path.Combine(OutputPath, "image_extract2_" + image_counter.ToString() + ".png");
94 image.ExportAsPng(fname); // or Export() to automatically select format
95 WriteLine("Image exported to " + fname);
96 await AddFileToOutputList(fname).ConfigureAwait(false);
97
98 // Convert PDF bitmap to GDI+ Bitmap...
99 //Bitmap bmp = image.GetBitmap();
100 //bmp.Save(fname, ImageFormat.Png);
101 //
102
103 // Instead of converting PDF images to a Bitmap, you can also extract
104 // uncompressed/compressed image data directly using element.GetImageData()
105 // as illustrated in ElementReaderAdv sample project.
106 }
107 }
108
109 doc.Destroy();
110 WriteLine("Done.");
111 }
112 catch (Exception e)
113 {
114 WriteLine("\n" + e.ToString());
115 }
116
117 WriteLine("\n--------------------------------");
118 WriteLine("Done ImageExtract Test.");
119 WriteLine("--------------------------------\n");
120 })).AsAsyncAction();
121 }
122
123 ///-----------------------------------------------------------------------------------
124 /// This sample illustrates one approach to PDF image extraction
125 /// using PDFNet.
126 ///
127 /// Note: Besides direct image export, you can also convert PDF images
128 /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
129 /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
130 /// sample project).
131 ///-----------------------------------------------------------------------------------
132
133 int image_counter = 0;
134
135 async Task<string> ImageExtract(ElementReader reader)
136 {
137 String result = "";
138 Element element;
139 while ((element = reader.Next()) != null)
140 {
141 switch (element.GetType())
142 {
143 case ElementType.e_image:
144 case ElementType.e_inline_image:
145 {
146 result += (string.Format("--> Image: {0}\n", ++image_counter));
147 result += (string.Format(" Width: {0}\n", element.GetImageWidth()));
148 result += (string.Format(" Height: {0}\n", element.GetImageHeight()));
149 result += (string.Format(" BPC: {0}\n", element.GetBitsPerComponent()));
150
151 Matrix2D ctm = element.GetCTM();
152 //double x2 = 1, y2 = 1;
153 //ctm.Mult(ref x2, ref y2);
154 /*
155 pdftron.Common.Double x2 = new pdftron.Common.Double(1);
156 pdftron.Common.Double y2 = new pdftron.Common.Double(1);
157 ctm.Mult(x2, y2);
158
159 Message += string.Format("\n Coords: x1={0}, y1={1}, x2={2}, y2={3}", ctm.m_h, ctm.m_v, x2, y2);
160 */
161 if (element.GetType() == ElementType.e_image)
162 {
163 string fname = Path.Combine(OutputPath, "image_extract1_" + image_counter.ToString() + ".tif");
164 pdftron.PDF.Image image = new pdftron.PDF.Image(element.GetXObject());
165 image.ExportAsTiff(fname); // or Export() to automatically select format
166 WriteLine("Image exported to " + fname);
167 await AddFileToOutputList(fname).ConfigureAwait(false);
168
169 // Convert PDF bitmap to GDI+ Bitmap...
170 //Bitmap bmp = element.GetBitmap();
171 //bmp.Save(fname, ImageFormat.Png);
172 //
173
174 // Instead of converting PDF images to a Bitmap, you can also extract
175 // uncompressed/compressed image data directly using element.GetImageData()
176 // as illustrated in ElementReaderAdv sample project.
177 }
178 break;
179 }
180 case ElementType.e_form: // Process form XObjects
181 {
182 reader.FormBegin();
183 result += await ImageExtract(reader);
184 reader.End();
185 break;
186 }
187 }
188 }
189 return result;
190 }
191 }
192}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales