More languages
Some test text!
More languages
Sample VB code for using PDFTron SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our VB PDF Library and PDF Parsing & Content Extraction Library.
Get Started Samples DownloadTo run this sample, get started with a free trial of Apryse SDK.
'
' Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
'
Imports System
Imports System.Drawing
Imports System.Drawing.Imaging
Imports pdftron
Imports pdftron.Common
Imports PDFTRON.SDF
Imports pdftron.PDF
Module ImageExtractTestVB
Dim pdfNetLoader As PDFNetLoader
Sub New()
pdfNetLoader = pdftron.PDFNetLoader.Instance()
End Sub
'-----------------------------------------------------------------------------------
' This sample illustrates one approach to PDF image extraction
' using PDFNet.
'
' Note: Besides direct image export, you can also convert PDF images
' to GDI+ Bitmap, or extract uncompressed/compressed image data directly
' using element.GetImageData() (as illustrated in ElementReaderAdv
' sample project).
'-----------------------------------------------------------------------------------
Dim image_counter As Integer = 0
' Relative path to the folder containing test files.
Dim input_path As String = "../../../../TestFiles/"
Dim output_path As String = "../../../../TestFiles/Output/"
Sub ImageExtract(ByRef reader As ElementReader)
Dim element As Element = reader.Next()
While (Not IsNothing(element)) ' Read page contents
Dim type As Element.Type = element.GetType()
If type = element.Type.e_image Or type = element.Type.e_inline_image Then
image_counter = image_counter + 1
Console.WriteLine("--> Image: {0}", image_counter)
Console.WriteLine(" Width: {0}", element.GetImageWidth())
Console.WriteLine(" Height: {0}", element.GetImageHeight())
Console.WriteLine(" BPC: {0}", element.GetBitsPerComponent())
Dim ctm As Matrix2D = element.GetCTM()
Dim x2 As Double = 1
Dim y2 As Double = 1
ctm.Mult(x2, y2)
Console.WriteLine(" Coords: x1=" + String.Format("{0:N2}", ctm.m_h) + ", y1=" + String.Format("{0:N2}", ctm.m_v) + ", x2=" + String.Format("{0:N2}", x2) + ", y2=" + String.Format("{0:N2}", y2))
If element.GetType() = element.Type.e_image Then
Dim fname As String = output_path + "image_extract1_" + image_counter.ToString()
Dim image As PDFTRON.PDF.Image = New PDFTRON.PDF.Image(element.GetXObject())
image.Export(fname) ' or ExporAsPng() or ExporAsTiff() ...
' Convert PDF bitmap to GDI+ Bitmap...
' Dim bmp As Bitmap = element.GetBitmap()
' bmp.Save(fname, ImageFormat.Png)
' bmp.Dispose()
' Instead of converting PDF images to a Bitmap, you can also extract
' uncompressed/compressed image data directly using element.GetImageData()
' as illustrated in ElementReaderAdv sample project.
End If
ElseIf type = element.Type.e_form Then
reader.FormBegin() ' Process form XObjects
ImageExtract(reader)
reader.End()
End If
element = reader.Next()
End While
End Sub
Sub Main()
PDFNet.Initialize(PDFTronLicense.Key)
' Example 1:
' Extract images by traversing the display list for
' every page. With this approach it is possible to obtain
' image positioning information and DPI.
Try
Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
doc.InitSecurityHandler()
Using reader As ElementReader = New ElementReader
Dim itr As PageIterator = doc.GetPageIterator()
While itr.HasNext()
reader.Begin(itr.Current())
ImageExtract(reader)
reader.End()
itr.Next()
End While
End Using
End Using
Console.WriteLine("Done.")
Catch ex As PDFNetException
Console.WriteLine(ex.Message)
Catch ex As Exception
MsgBox(ex.Message)
End Try
Console.WriteLine("----------------------------------------------------------------")
' Example 2:
' Extract images by scanning the low-level document.
Try
Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
doc.InitSecurityHandler()
image_counter = 0
Dim cos_doc As SDFDoc = doc.GetSDFDoc()
Dim num_objs As Integer = cos_doc.XRefSize()
For i As Integer = 1 To num_objs - 1
Dim obj As Obj = cos_doc.GetObj(i)
If Not (obj Is Nothing Or obj.IsFree()) Then
' Process only images
If obj.IsStream() Then
Dim itr As DictIterator = obj.Find("Type")
If itr.HasNext() Then
If itr.Value().GetName() = "XObject" Then
itr = obj.Find("Subtype")
If itr.HasNext() Then
If itr.Value().GetName() = "Image" Then
Dim image As pdftron.PDF.Image = New pdftron.PDF.Image(obj)
image_counter = image_counter + 1
Console.WriteLine("--> Image: {0}", image_counter)
Console.WriteLine(" Width: {0}", image.GetImageWidth())
Console.WriteLine(" Height: {0}", image.GetImageHeight())
Console.WriteLine(" BPC: {0}", image.GetBitsPerComponent())
Dim fname As String = output_path + "image_extract2_" + image_counter.ToString()
image.Export(fname) ' or ExporAsPng() or ExporAsTiff() ...
' Convert PDF bitmap to GDI+ Bitmap...
' Dim bmp As Bitmap = element.GetBitmap()
' bmp.Save(fname, ImageFormat.Png)
' bmp.Dispose()
' Instead of converting PDF images to a Bitmap, you can also extract
' uncompressed/compressed image data directly using element.GetImageData()
' as illustrated in ElementReaderAdv sample project.
End If
End If
End If
End If
End If
End If
Next
End Using
Console.WriteLine("Done.")
Catch ex As PDFNetException
Console.WriteLine(ex.Message)
Catch ex As Exception
MsgBox(ex.Message)
End Try
PDFNet.Terminate()
End Sub
End Module