ImageExtract

Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our UWP SDK and PDF Data Extraction SDK Capabilities.
1//
2// Copyright (c) 2001-2020 by PDFTron Systems Inc. All Rights Reserved.
3//
4
5using System;
6using System.IO;
7using System.Threading.Tasks;
8using Windows.Foundation;
9
10using pdftron.Common;
11using pdftron.PDF;
12using pdftron.SDF;
13
14using PDFNetUniversalSamples.ViewModels;
15
16namespace PDFNetSamples
17{
18    public sealed class ImageExtractTest : Sample
19    {
20        public ImageExtractTest() :
21            base("ImageExtract", "This sample illustrates couple of approaches to PDF image extraction.")
22        {
23        }
24
25        public override IAsyncAction RunAsync()
26        {
27            return Task.Run(new System.Action(async () =>
28            {
29                WriteLine("--------------------------------");
30                WriteLine("Starting ImageExtract Test...");
31                WriteLine("--------------------------------\n");
32                // Example 1: 
33                // Extract images by traversing the display list for 
34                // every page. With this approach it is possible to obtain 
35                // image positioning information and DPI.
36                try
37                {
38                    String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
39                    WriteLine("Opening input file " + input_file_path);
40                    PDFDoc doc = new PDFDoc(input_file_path);
41                    doc.InitSecurityHandler();
42
43                    ElementReader reader = new ElementReader();
44                    PageIterator itr;
45                    for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())
46                    {
47                        reader.Begin(itr.Current());
48                        await ImageExtract(reader).ConfigureAwait(false);
49                        reader.End();
50                    }
51                    doc.Destroy();
52                    WriteLine("Done.");
53                }
54                catch (Exception e)
55                {
56                    WriteLine(GetExceptionMessage(e));
57                }
58                
59                WriteLine("----------------------------------------------------------------");
60
61                // Example 2: 
62                // Extract images by scanning the low-level document.
63                try
64                {
65                    String input_file_path = Path.Combine(InputPath, "newsletter.pdf");
66                    PDFDoc doc = new PDFDoc(input_file_path);
67                    doc.InitSecurityHandler();
68                    image_counter = 0;
69
70                    SDFDoc cos_doc = doc.GetSDFDoc();
71                    int num_objs = cos_doc.XRefSize();
72                    for (int i = 1; i < num_objs; ++i)
73                    {
74                        Obj obj = cos_doc.GetObj(i);
75                        if (obj != null && !obj.IsFree() && obj.IsStream())
76                        {
77                            // Process only images
78                            DictIterator itr = obj.Find("Subtype");
79                            if (!itr.HasNext() || itr.Value().GetName() != "Image")
80                                continue;
81
82                            itr = obj.Find("Type");
83                            if (!itr.HasNext() || itr.Value().GetName() != "XObject")
84                                continue;
85
86                            pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
87                        
88                            WriteLine(string.Format("--> Image: {0}", ++image_counter));
89                            WriteLine(string.Format("    Width: {0}", image.GetImageWidth()));
90                            WriteLine(string.Format("    Height: {0}", image.GetImageHeight()));
91                            WriteLine(string.Format("    BPC: {0}", image.GetBitsPerComponent()));
92
93                            string fname = Path.Combine(OutputPath, "image_extract2_" + image_counter.ToString() + ".png");
94                            image.ExportAsPng(fname);  // or Export() to automatically select format
95                            WriteLine("Image exported to " + fname);
96                            await AddFileToOutputList(fname).ConfigureAwait(false);
97
98                            // Convert PDF bitmap to GDI+ Bitmap...
99                            //Bitmap bmp = image.GetBitmap();
100                            //bmp.Save(fname, ImageFormat.Png);
101                            //
102
103                            // Instead of converting PDF images to a Bitmap, you can also extract 
104                            // uncompressed/compressed image data directly using element.GetImageData() 
105                            // as illustrated in ElementReaderAdv sample project.
106                        }
107                    }
108
109                    doc.Destroy();
110                    WriteLine("Done.");
111                }
112                catch (Exception e)
113                {
114                    WriteLine("\n" + e.ToString());
115                }
116                
117                WriteLine("\n--------------------------------");
118                WriteLine("Done ImageExtract Test.");
119                WriteLine("--------------------------------\n");
120            })).AsAsyncAction();
121        }
122
123        ///-----------------------------------------------------------------------------------
124        /// This sample illustrates one approach to PDF image extraction 
125        /// using PDFNet.
126        /// 
127        /// Note: Besides direct image export, you can also convert PDF images 
128        /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
129        /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
130        /// sample project).
131        ///-----------------------------------------------------------------------------------
132
133        int image_counter = 0;
134
135        async Task<string> ImageExtract(ElementReader reader)
136        {
137            String result = "";
138            Element element;
139            while ((element = reader.Next()) != null)
140            {
141                switch (element.GetType())
142                {
143                    case ElementType.e_image:
144                    case ElementType.e_inline_image:
145                        {
146                            result += (string.Format("--> Image: {0}\n", ++image_counter));
147                            result += (string.Format("    Width: {0}\n", element.GetImageWidth()));
148                            result += (string.Format("    Height: {0}\n", element.GetImageHeight()));
149                            result += (string.Format("    BPC: {0}\n", element.GetBitsPerComponent()));
150
151                            Matrix2D ctm = element.GetCTM();
152                            //double x2 = 1, y2 = 1;
153                            //ctm.Mult(ref x2, ref y2);
154                            /*
155                            pdftron.Common.Double x2 = new pdftron.Common.Double(1);
156                            pdftron.Common.Double y2 = new pdftron.Common.Double(1);
157                            ctm.Mult(x2, y2);
158                            
159                            Message += string.Format("\n    Coords: x1={0}, y1={1}, x2={2}, y2={3}", ctm.m_h, ctm.m_v, x2, y2);
160                            */
161                            if (element.GetType() == ElementType.e_image)
162                            {
163                                string fname = Path.Combine(OutputPath, "image_extract1_" + image_counter.ToString() + ".tif");
164                                pdftron.PDF.Image image = new pdftron.PDF.Image(element.GetXObject());
165                                image.ExportAsTiff(fname);  // or Export() to automatically select format
166                                WriteLine("Image exported to " + fname);
167                                await AddFileToOutputList(fname).ConfigureAwait(false);
168
169                                // Convert PDF bitmap to GDI+ Bitmap...
170                                //Bitmap bmp = element.GetBitmap();
171                                //bmp.Save(fname, ImageFormat.Png);
172                                //
173
174                                // Instead of converting PDF images to a Bitmap, you can also extract 
175                                // uncompressed/compressed image data directly using element.GetImageData() 
176                                // as illustrated in ElementReaderAdv sample project.
177                            }
178                            break;
179                        }
180                    case ElementType.e_form: // Process form XObjects
181                        {
182                            reader.FormBegin();
183                            result += await ImageExtract(reader);
184                            reader.End();
185                            break;
186                        }
187                }
188            }
189            return result;
190        }
191    }
192}
Did you find this helpful?
Trial setup questions?
Ask experts on Discord
Need other help?
Contact Support
Pricing or product questions?
Contact Sales
Product:

ImageExtract