ImageExtract

Sample code for using Apryse UWP SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with UWP SDK.

1//
2// Copyright (c) 2001-2020 by PDFTron Systems Inc. All Rights Reserved.
3//
4
5using System;
6using System.IO;
7using System.Threading.Tasks;
8using Windows.Foundation;
9
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13
14using PDFNetUniversalSamples.ViewModels;
15
16namespace PDFNetSamples
17{
18 public sealed class ImageExtractTest : Sample
19 {
20 public ImageExtractTest() :
21 base("ImageExtract", "This sample illustrates couple of approaches to PDF image extraction.")
22 {
23 }
24
25 public override IAsyncAction RunAsync()
26 {
27 return Task.Run(new System.Action(async () =>
28 {
29 WriteLine("--------------------------------");
30 WriteLine("Starting ImageExtract Test...");
31 WriteLine("--------------------------------\n");
32 // Example 1:
33 // Extract images by traversing the display list for
34 // every page. With this approach it is possible to obtain
35 // image positioning information and DPI.
36 try
37 {
38 String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
39 WriteLine("Opening input file " + input_file_path);
40 PDFDoc doc = new PDFDoc(input_file_path);
41 doc.InitSecurityHandler();
42
43 ElementReader reader = new ElementReader();
44 PageIterator itr;
45 for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
46 {
47 reader.Begin(itr.Current());
48 await ImageExtract(reader).ConfigureAwait(false);
49 reader.End();
50 }
51 doc.Destroy();
52 WriteLine("Done.");
53 }
54 catch (Exception e)
55 {
56 WriteLine(GetExceptionMessage(e));
57 }
58
59 WriteLine("----------------------------------------------------------------");
60
61 // Example 2:
62 // Extract images by scanning the low-level document.
63 try
64 {
65 String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
66 PDFDoc doc = new PDFDoc(input_file_path);
67 doc.InitSecurityHandler();
68 image_counter = 0;
69
70 SDFDoc cos_doc = doc.GetSDFDoc();
71 int num_objs = cos_doc.XRefSize();
72 for (int i = 1; i < num_objs; ++i)
73 {
74 Obj obj = cos_doc.GetObj(i);
75 if (obj != null && !obj.IsFree() && obj.IsStream())
76 {
77 // Process only images
78 DictIterator itr = obj.Find("Subtype");
79 if (!itr.HasNext() || itr.Value().GetName() != "Image")
80 continue;
81
82 itr = obj.Find("Type");
83 if (!itr.HasNext() || itr.Value().GetName() != "XObject")
84 continue;
85
86 pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
87
88 WriteLine(string.Format("--> Image: {0}", ++image_counter));
89 WriteLine(string.Format(" Width: {0}", image.GetImageWidth()));
90 WriteLine(string.Format(" Height: {0}", image.GetImageHeight()));
91 WriteLine(string.Format(" BPC: {0}", image.GetBitsPerComponent()));
92
93 string fname = Path.Combine(OutputPath, "image_extract2_" + image_counter.ToString() + ".png");
94 image.ExportAsPng(fname); // or Export() to automatically select format
95 WriteLine("Image exported to " + fname);
96 await AddFileToOutputList(fname).ConfigureAwait(false);
97
98 // Convert PDF bitmap to GDI+ Bitmap...
99 //Bitmap bmp = image.GetBitmap();
100 //bmp.Save(fname, ImageFormat.Png);
101 //
102
103 // Instead of converting PDF images to a Bitmap, you can also extract
104 // uncompressed/compressed image data directly using element.GetImageData()
105 // as illustrated in ElementReaderAdv sample project.
106 }
107 }
108
109 doc.Destroy();
110 WriteLine("Done.");
111 }
112 catch (Exception e)
113 {
114 WriteLine("\n" + e.ToString());
115 }
116
117 WriteLine("\n--------------------------------");
118 WriteLine("Done ImageExtract Test.");
119 WriteLine("--------------------------------\n");
120 })).AsAsyncAction();
121 }
122
123 ///-----------------------------------------------------------------------------------
124 /// This sample illustrates one approach to PDF image extraction
125 /// using PDFNet.
126 ///
127 /// Note: Besides direct image export, you can also convert PDF images
128 /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
129 /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
130 /// sample project).
131 ///-----------------------------------------------------------------------------------
132
133 int image_counter = 0;
134
135 async Task<string> ImageExtract(ElementReader reader)
136 {
137 String result = "";
138 Element element;
139 while ((element = reader.Next()) != null)
140 {
141 switch (element.GetType())
142 {
143 case ElementType.e_image:
144 case ElementType.e_inline_image:
145 {
146 result += (string.Format("--> Image: {0}\n", ++image_counter));
147 result += (string.Format(" Width: {0}\n", element.GetImageWidth()));
148 result += (string.Format(" Height: {0}\n", element.GetImageHeight()));
149 result += (string.Format(" BPC: {0}\n", element.GetBitsPerComponent()));
150
151 Matrix2D ctm = element.GetCTM();
152 //double x2 = 1, y2 = 1;
153 //ctm.Mult(ref x2, ref y2);
154 /*
155 pdftron.Common.Double x2 = new pdftron.Common.Double(1);
156 pdftron.Common.Double y2 = new pdftron.Common.Double(1);
157 ctm.Mult(x2, y2);
158
159 Message += string.Format("\n Coords: x1={0}, y1={1}, x2={2}, y2={3}", ctm.m_h, ctm.m_v, x2, y2);
160 */
161 if (element.GetType() == ElementType.e_image)
162 {
163 string fname = Path.Combine(OutputPath, "image_extract1_" + image_counter.ToString() + ".tif");
164 pdftron.PDF.Image image = new pdftron.PDF.Image(element.GetXObject());
165 image.ExportAsTiff(fname); // or Export() to automatically select format
166 WriteLine("Image exported to " + fname);
167 await AddFileToOutputList(fname).ConfigureAwait(false);
168
169 // Convert PDF bitmap to GDI+ Bitmap...
170 //Bitmap bmp = element.GetBitmap();
171 //bmp.Save(fname, ImageFormat.Png);
172 //
173
174 // Instead of converting PDF images to a Bitmap, you can also extract
175 // uncompressed/compressed image data directly using element.GetImageData()
176 // as illustrated in ElementReaderAdv sample project.
177 }
178 break;
179 }
180 case ElementType.e_form: // Process form XObjects
181 {
182 reader.FormBegin();
183 result += await ImageExtract(reader);
184 reader.End();
185 break;
186 }
187 }
188 }
189 return result;
190 }
191 }
192}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales