ImageExtract

Sample C# code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with Xamarin SDK.

1//
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3//
4
5using System;
6using System.Drawing;
7
8using pdftron;
9using pdftron.Common;
10using pdftron.PDF;
11using pdftron.SDF;
12using pdftron.Filters;
13
14using NUnit.Framework;
15
16namespace MiscellaneousSamples
17{
18	[TestFixture]
19	public class ImageExtractTest
20	{
21		/// <summary>
22		///-----------------------------------------------------------------------------------
23		/// This sample illustrates one approach to PDF image extraction 
24		/// using PDFNet.
25		/// 
26		/// Note: Besides direct image export, you can also convert PDF images 
27		/// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
28		/// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
29		/// sample project).
30		///-----------------------------------------------------------------------------------
31		/// </summary>
32
33
34		static int image_counter = 0;
35
36		// Relative path to the folder containing test files.
37		const string input_path =  "TestFiles/";
38
39		static void ImageExtract(PDFDoc doc, ElementReader reader) 
40		{
41			Element element; 
42			while ((element = reader.Next()) != null)
43			{
44				switch (element.GetType()) 
45				{
46					case Element.Type.e_image:
47					case Element.Type.e_inline_image:
48					{
49						Console.WriteLine("--> Image: {0}", ++image_counter);
50						Console.WriteLine("    Width: {0}", element.GetImageWidth());
51						Console.WriteLine("    Height: {0}", element.GetImageHeight());
52						Console.WriteLine("    BPC: {0}", element.GetBitsPerComponent());
53
54						Matrix2D ctm = element.GetCTM();
55						double x2=1, y2=1, y1=ctm.m_v;
56						ctm.Mult(ref x2, ref y2);
57                        // Write the coords to 3 decimal places.
58						Console.WriteLine("    Coords: x1={0:N2}, y1={1:N2}, x2={2:N2}, y2={3:N2}", ctm.m_h, ctm.m_v, x2, y2);
59						pdftron.PDF.Image image = null;
60						if (element.GetType() == Element.Type.e_image) 
61						{
62							image = new pdftron.PDF.Image(element.GetXObject());
63
64							string fname = Utils.CreateExternalFile("image_extract1_") + image_counter.ToString();
65							image.Export(fname);  // or ExporAsPng() or ExporAsTiff() ...
66						}
67						break;
68					}
69					case Element.Type.e_form: // Process form XObjects
70					{
71						reader.FormBegin(); 
72						ImageExtract(doc, reader);
73						reader.End(); 
74						break; 
75					}
76				}
77			}
78		}
79
80		[Test]
81		public static void Sample()
82		{
83			
84			// Example 1: 
85			// Extract images by traversing the display list for 
86			// every page. With this approach it is possible to obtain 
87			// image positioning information and DPI.
88			try	
89			{
90				using (PDFDoc doc = new PDFDoc(Utils.GetAssetTempFile(input_path + "newsletter.pdf")))
91				using (ElementReader reader = new ElementReader())
92				{
93					doc.InitSecurityHandler();
94					PageIterator itr;
95					for (itr=doc.GetPageIterator(); itr.HasNext(); itr.Next())	
96					{				
97						reader.Begin(itr.Current());
98						ImageExtract(doc, reader);
99						reader.End();
100					}
101
102					Console.WriteLine("Done.");
103				}
104			}
105			catch (PDFNetException e)
106			{
107				Console.WriteLine(e.Message);
108				Assert.True(false);
109			}
110
111			Console.WriteLine("----------------------------------------------------------------");
112
113			// Example 2: 
114			// Extract images by scanning the low-level document.
115			try	
116			{
117				using (PDFDoc doc = new PDFDoc(Utils.GetAssetTempFile(input_path + "newsletter.pdf")))
118				{
119					doc.InitSecurityHandler();
120					image_counter = 0;
121
122					SDFDoc cos_doc = doc.GetSDFDoc();
123					int num_objs = cos_doc.XRefSize();
124					for (int i=1; i<num_objs; ++i)
125					{
126						Obj obj = cos_doc.GetObj(i);
127						if (obj!=null && !obj.IsFree()&& obj.IsStream()) 
128						{
129							// Process only images
130							DictIterator itr = obj.Find("Subtype");
131							if (!itr.HasNext() || itr.Value().GetName() != "Image") 
132								continue; 
133
134							itr = obj.Find("Type");
135							if (!itr.HasNext() || itr.Value().GetName() != "XObject") 
136								continue;
137
138							pdftron.PDF.Image image = new pdftron.PDF.Image(obj);
139
140							Console.WriteLine("--> Image: {0}", ++image_counter);
141							Console.WriteLine("    Width: {0}", image.GetImageWidth());
142							Console.WriteLine("    Height: {0}", image.GetImageHeight());
143							Console.WriteLine("    BPC: {0}", image.GetBitsPerComponent());
144
145							string fname = Utils.CreateExternalFile("image_extract2_") + image_counter.ToString();
146							image.Export(fname);  // or ExporAsPng() or ExporAsTiff() ...
147
148							// Convert PDF bitmap to GDI+ Bitmap...
149							//Bitmap bmp = image.GetBitmap();
150							//bmp.Save(fname, ImageFormat.Png);
151							//bmp.Dispose();
152
153							// Instead of converting PDF images to a Bitmap, you can also extract 
154							// uncompressed/compressed image data directly using element.GetImageData() 
155							// as illustrated in ElementReaderAdv sample project.
156						}
157					}
158                    Console.WriteLine("Done.");
159                }
160			}
161			catch (PDFNetException e)
162			{
163				Console.WriteLine(e.Message);
164				Assert.True(false);
165			}
166
167		}
168	}
169}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

ImageExtract