Some test text!

Search
Hamburger Icon

PDF image extraction in VB

More languages

More languages
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample VB code for using PDFTron SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our VB PDF Library and PDF Parsing & Content Extraction Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

'
' Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
'

Imports System
Imports System.Drawing
Imports System.Drawing.Imaging

Imports pdftron
Imports pdftron.Common
Imports PDFTRON.SDF
Imports pdftron.PDF

Module ImageExtractTestVB
	Dim pdfNetLoader As PDFNetLoader
	Sub New()
		pdfNetLoader = pdftron.PDFNetLoader.Instance()
	End Sub

	'-----------------------------------------------------------------------------------
	' This sample illustrates one approach to PDF image extraction 
	' using PDFNet.
	' 
	' Note: Besides direct image export, you can also convert PDF images 
	' to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
	' using element.GetImageData() (as illustrated in ElementReaderAdv 
	' sample project).
	'-----------------------------------------------------------------------------------

	Dim image_counter As Integer = 0

	' Relative path to the folder containing test files.
	Dim input_path As String = "../../../../TestFiles/"
	Dim output_path As String = "../../../../TestFiles/Output/"


	Sub ImageExtract(ByRef reader As ElementReader)
		Dim element As Element = reader.Next()
		While (Not IsNothing(element))		 ' Read page contents
			Dim type As Element.Type = element.GetType()

			If type = element.Type.e_image Or type = element.Type.e_inline_image Then
				image_counter = image_counter + 1
				Console.WriteLine("--> Image: {0}", image_counter)
				Console.WriteLine("    Width: {0}", element.GetImageWidth())
				Console.WriteLine("    Height: {0}", element.GetImageHeight())
				Console.WriteLine("    BPC: {0}", element.GetBitsPerComponent())

				Dim ctm As Matrix2D = element.GetCTM()
				Dim x2 As Double = 1
				Dim y2 As Double = 1
				ctm.Mult(x2, y2)
				Console.WriteLine("    Coords: x1=" + String.Format("{0:N2}", ctm.m_h) + ", y1=" + String.Format("{0:N2}", ctm.m_v) + ", x2=" + String.Format("{0:N2}", x2) + ", y2=" + String.Format("{0:N2}", y2))

				If element.GetType() = element.Type.e_image Then
					Dim fname As String = output_path + "image_extract1_" + image_counter.ToString()
					Dim image As PDFTRON.PDF.Image = New PDFTRON.PDF.Image(element.GetXObject())
					image.Export(fname)					' or ExporAsPng() or ExporAsTiff() ...

					' Convert PDF bitmap to GDI+ Bitmap...
					' Dim bmp As Bitmap = element.GetBitmap()
					' bmp.Save(fname, ImageFormat.Png)
					' bmp.Dispose()

					' Instead of converting PDF images to a Bitmap, you can also extract 
					' uncompressed/compressed image data directly using element.GetImageData() 
					' as illustrated in ElementReaderAdv sample project.
				End If
			ElseIf type = element.Type.e_form Then
				reader.FormBegin()				   ' Process form XObjects
				ImageExtract(reader)
				reader.End()
			End If

			element = reader.Next()
		End While
	End Sub

	Sub Main()

		PDFNet.Initialize(PDFTronLicense.Key)

		' Example 1: 
		' Extract images by traversing the display list for 
		' every page. With this approach it is possible to obtain 
		' image positioning information and DPI.
		Try
			Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
				doc.InitSecurityHandler()
				Using reader As ElementReader = New ElementReader

					Dim itr As PageIterator = doc.GetPageIterator()
					While itr.HasNext()
						reader.Begin(itr.Current())
						ImageExtract(reader)
						reader.End()
						itr.Next()
					End While

				End Using
			End Using
			Console.WriteLine("Done.")
		Catch ex As PDFNetException
			Console.WriteLine(ex.Message)
		Catch ex As Exception
			MsgBox(ex.Message)
		End Try
		Console.WriteLine("----------------------------------------------------------------")

		' Example 2: 
		' Extract images by scanning the low-level document.
		Try
			Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
				doc.InitSecurityHandler()
				image_counter = 0

				Dim cos_doc As SDFDoc = doc.GetSDFDoc()
				Dim num_objs As Integer = cos_doc.XRefSize()

				For i As Integer = 1 To num_objs - 1
					Dim obj As Obj = cos_doc.GetObj(i)
					If Not (obj Is Nothing Or obj.IsFree()) Then
						' Process only images
						If obj.IsStream() Then
							Dim itr As DictIterator = obj.Find("Type")
							If itr.HasNext() Then
								If itr.Value().GetName() = "XObject" Then
									itr = obj.Find("Subtype")
									If itr.HasNext() Then
										If itr.Value().GetName() = "Image" Then
											Dim image As pdftron.PDF.Image = New pdftron.PDF.Image(obj)

											image_counter = image_counter + 1
											Console.WriteLine("--> Image: {0}", image_counter)
											Console.WriteLine("    Width: {0}", image.GetImageWidth())
											Console.WriteLine("    Height: {0}", image.GetImageHeight())
											Console.WriteLine("    BPC: {0}", image.GetBitsPerComponent())

											Dim fname As String = output_path + "image_extract2_" + image_counter.ToString()
											image.Export(fname)		   ' or ExporAsPng() or ExporAsTiff() ...

											' Convert PDF bitmap to GDI+ Bitmap...
											' Dim bmp As Bitmap = element.GetBitmap()
											' bmp.Save(fname, ImageFormat.Png)
											' bmp.Dispose()

											' Instead of converting PDF images to a Bitmap, you can also extract 
											' uncompressed/compressed image data directly using element.GetImageData() 
											' as illustrated in ElementReaderAdv sample project.
										End If
									End If
								End If
							End If
						End If
					End If
				Next
			End Using
			Console.WriteLine("Done.")	
		Catch ex As PDFNetException
			Console.WriteLine(ex.Message)
		Catch ex As Exception
			MsgBox(ex.Message)
		End Try
		PDFNet.Terminate()
	End Sub
End Module