Some test text!

Search
Hamburger Icon

Read elements across all PDF pages in VB

More languages

More languages
JavaScript
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample VB code for using PDFTron SDK to traverse the page display list using ElementReader. Learn more about our VB PDF Library and PDF Parsing & Content Extraction Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

'
' Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
'

Imports System

Imports pdftron
Imports pdftron.Common
Imports pdftron.Filters
Imports pdftron.SDF
Imports pdftron.PDF

Module ElementReaderTestVB
    Dim pdfNetLoader As PDFNetLoader
    Sub New()
        pdfNetLoader = pdftron.PDFNetLoader.Instance()
    End Sub


    Sub ProcessElements(ByVal reader As ElementReader)
        Dim element As Element = reader.Next()
        While Not IsNothing(element)  ' Read page contents
            Select element.GetType()
                Case Element.Type.e_path
                    ' Process path data...
                    Dim pathData As PathData = element.GetPathData()
                    Dim data As Double() = pathData.points

                Case Element.Type.e_text
                    ' Process text strings...
                    Dim result As String = element.GetTextString()
                    Console.WriteLine(result)

                Case Element.Type.e_form
                    ' Process form XObjects
                    reader.FormBegin()
                    Console.WriteLine("Process Element.Type.e_form")
                    ProcessElements(reader)
                    reader.End()
            End Select
            element = reader.Next()
        End While
    End Sub

    Sub Main()

        PDFNet.Initialize(PDFTronLicense.Key)

        ' Relative path to the folder containing test files.
        Dim input_path As String = "../../../../TestFiles/"
        'Dim output_path As String = "../../../../TestFiles/Output/"

        Console.WriteLine("-------------------------------------------------")
        Console.WriteLine("Sample 1 - Extract text data from all pages in the document.")

        ' Open the test file
        Console.WriteLine("Opening the input pdf...")
        Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
            Using page_reader As ElementReader = New ElementReader
                doc.InitSecurityHandler()
                Dim itr As PageIterator = doc.GetPageIterator()
                While itr.HasNext()  '  Read every page
                    page_reader.Begin(itr.Current())
                    ProcessElements(page_reader)
                    page_reader.End()
                    itr.Next()
                End While
            End Using
        End Using
        PDFNet.Terminate()
        Console.WriteLine("Done.")

    End Sub

End Module