ImageExtract

Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our Xamarin SDK and PDF Data Extraction SDK Capabilities.

1//
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3//
4
5using System;
6using System.Drawing;
7
8using pdftron;
9using pdftron.Common;
10using pdftron.PDF;
11using pdftron.SDF;
12using pdftron.Filters;
13
14using NUnit.Framework;
15
16namespace MiscellaneousSamples
17{
18 [TestFixture]
19 public class ImageExtractTest
20 {
21 /// <summary>
22 ///-----------------------------------------------------------------------------------
23 /// This sample illustrates one approach to PDF image extraction
24 /// using PDFNet.
25 ///
26 /// Note: Besides direct image export, you can also convert PDF images
27 /// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
28 /// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
29 /// sample project).
30 ///-----------------------------------------------------------------------------------
31 /// </summary>
32
33
34 static int image_counter = 0;
35
36 // Relative path to the folder containing test files.
37 const string input_path = "TestFiles/";
38
39 static void ImageExtract(PDFDoc doc, ElementReader reader)
40 {
41 Element element;
42 while ((element = reader.Next()) != null)
43 {
44 switch (element.GetType())
45 {
46 case Element.Type.e_image:
47 case Element.Type.e_inline_image:
48 {
49 Console.WriteLine("--> Image: {0}", ++image_counter);
50 Console.WriteLine(" Width: {0}", element.GetImageWidth());
51 Console.WriteLine(" Height: {0}", element.GetImageHeight());
52 Console.WriteLine(" BPC: {0}", element.GetBitsPerComponent());
53
54 Matrix2D ctm = element.GetCTM();
55 double x2=1, y2=1, y1=ctm.m_v;
56 ctm.Mult(ref x2, ref y2);
57 // Write the coords to 3 decimal places.
58 Console.WriteLine(" Coords: x1={0:N2}, y1={1:N2}, x2={2:N2}, y2={3:N2}", ctm.m_h, ctm.m_v, x2, y2);
59 pdftron.PDF.Image image = null;
60 if (element.GetType() == Element.Type.e_image)
61 {
62 image = new pdftron.PDF.Image(element.GetXObject());
63
64 string fname = Utils.CreateExternalFile("image_extract1_") + image_counter.ToString();
65 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
66 }
67 break;
68 }
69 case Element.Type.e_form: // Process form XObjects
70 {
71 reader.FormBegin();
72 ImageExtract(doc, reader);
73 reader.End();
74 break;
75 }
76 }
77 }
78 }
79
80 [Test]
81 public static void Sample()
82 {
83
84 // Example 1:
85 // Extract images by traversing the display list for
86 // every page. With this approach it is possible to obtain
87 // image positioning information and DPI.
88 try
89 {
90 using (PDFDoc doc = new PDFDoc(Utils.GetAssetTempFile(input_path + "newsletter.pdf")))
91 using (ElementReader reader = new ElementReader())
92 {
93 doc.InitSecurityHandler();
94 PageIterator itr;
95 for (itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())
96 {
97 reader.Begin(itr.Current());
98 ImageExtract(doc, reader);
99 reader.End();
100 }
101
102 Console.WriteLine("Done.");
103 }
104 }
105 catch (PDFNetException e)
106 {
107 Console.WriteLine(e.Message);
108 Assert.True(false);
109 }
110
111 Console.WriteLine("----------------------------------------------------------------");
112
113 // Example 2:
114 // Extract images by scanning the low-level document.
115 try
116 {
117 using (PDFDoc doc = new PDFDoc(Utils.GetAssetTempFile(input_path + "newsletter.pdf")))
118 {
119 doc.InitSecurityHandler();
120 image_counter = 0;
121
122 SDFDoc cos_doc = doc.GetSDFDoc();
123 int num_objs = cos_doc.XRefSize();
124 for (int i=1; i<num_objs; ++i)
125 {
126 Obj obj = cos_doc.GetObj(i);
127 if (obj!=null && !obj.IsFree()&& obj.IsStream())
128 {
129 // Process only images
130 DictIterator itr = obj.Find("Subtype");
131 if (!itr.HasNext() || itr.Value().GetName() != "Image")
132 continue;
133
134 itr = obj.Find("Type");
135 if (!itr.HasNext() || itr.Value().GetName() != "XObject")
136 continue;
137
138 pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
139
140 Console.WriteLine("--> Image: {0}", ++image_counter);
141 Console.WriteLine(" Width: {0}", image.GetImageWidth());
142 Console.WriteLine(" Height: {0}", image.GetImageHeight());
143 Console.WriteLine(" BPC: {0}", image.GetBitsPerComponent());
144
145 string fname = Utils.CreateExternalFile("image_extract2_") + image_counter.ToString();
146 image.Export(fname); // or ExporAsPng() or ExporAsTiff() ...
147
148 // Convert PDF bitmap to GDI+ Bitmap...
149 //Bitmap bmp = image.GetBitmap();
150 //bmp.Save(fname, ImageFormat.Png);
151 //bmp.Dispose();
152
153 // Instead of converting PDF images to a Bitmap, you can also extract
154 // uncompressed/compressed image data directly using element.GetImageData()
155 // as illustrated in ElementReaderAdv sample project.
156 }
157 }
158 Console.WriteLine("Done.");
159 }
160 }
161 catch (PDFNetException e)
162 {
163 Console.WriteLine(e.Message);
164 Assert.True(false);
165 }
166
167 }
168 }
169}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales