Sample C# code for using Apryse SDK to traverse the page display list using ElementReader. Learn more about our Server SDK and PDF Data Extraction SDK Capabilities.
1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.Filters;
9using pdftron.SDF;
10using pdftron.PDF;
11
12namespace ElementReaderTestCS
13{
14 /// <summary>
15 /// Summary description for Class1.
16 /// </summary>
17 class Class1
18 {
19 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
20 static Class1() {}
21
22 static void ProcessElements(ElementReader reader)
23 {
24 Element element;
25 while ((element = reader.Next()) != null) // Read page contents
26 {
27 switch (element.GetType())
28 {
29
30 case Element.Type.e_path: // Process path data...
31 {
32 PathData data = element.GetPathData();
33 double[] points = data.points;
34 break;
35 }
36
37 case Element.Type.e_text: // Process text strings...
38 {
39 String str = element.GetTextString();
40 Console.WriteLine(str);
41 break;
42 }
43
44 case Element.Type.e_form: // Process form XObjects
45 {
46 Console.WriteLine("Process Element.Type.e_form");
47 reader.FormBegin();
48 ProcessElements(reader);
49 reader.End();
50 break;
51 }
52 }
53 }
54 }
55
56 /// <summary>
57 /// The main entry point for the application.
58 /// </summary>
59 [STAThread]
60 static void Main(string[] args)
61 {
62 PDFNet.Initialize(PDFTronLicense.Key);
63
64 // Relative path to the folder containing test files.
65 string input_path = "../../../../TestFiles/";
66
67 try
68 {
69 Console.WriteLine("-------------------------------------------------");
70 Console.WriteLine("Sample 1 - Extract text data from all pages in the document.");
71
72 // Open the test file
73 Console.WriteLine("Opening the input pdf...");
74 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
75 using (ElementReader page_reader = new ElementReader())
76 {
77 doc.InitSecurityHandler();
78
79 PageIterator itr;
80 for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next()) // Read every page
81 {
82 page_reader.Begin(itr.Current());
83 ProcessElements(page_reader);
84 page_reader.End();
85 }
86 Console.WriteLine("Done.");
87 }
88
89 }
90 catch (PDFNetException e)
91 {
92 Console.WriteLine(e.Message);
93 }
94 PDFNet.Terminate();
95 }
96 }
97}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6#include <PDF/PDFNet.h>
7#include <PDF/PDFDoc.h>
8#include <PDF/ElementReader.h>
9#include <PDF/Element.h>
10#include <iostream>
11#include "../../LicenseKey/CPP/LicenseKey.h"
12
13using namespace pdftron;
14using namespace std;
15using namespace PDF;
16
17void ProcessElements(ElementReader& reader)
18{
19 for (Element element=reader.Next(); element; element = reader.Next()) // Read page contents
20 {
21 switch (element.GetType())
22 {
23 case Element::e_path: // Process path data...
24 {
25 PathData data = element.GetPathData();
26 const std::vector<unsigned char>& operators = data.GetOperators();
27 const std::vector<double>& points = data.GetPoints();
28 }
29 break;
30
31 case Element::e_text: // Process text strings...
32 {
33 const UString data = element.GetTextString();
34 cout << data << endl;
35 }
36 break;
37
38 case Element::e_form: // Process form XObjects
39 {
40 reader.FormBegin();
41 ProcessElements(reader);
42 reader.End();
43 }
44 break;
45 }
46 }
47}
48
49
50int main(int argc, char *argv[])
51{
52 int ret = 0;
53 PDFNet::Initialize(LicenseKey);
54
55 // Relative path to the folder containing test files.
56 string input_path = "../../TestFiles/";
57
58 try // Extract text data from all pages in the document
59 {
60 cout << "-------------------------------------------------" << endl;
61 cout << "Sample 1 - Extract text data from all pages in the document." << endl;
62 cout << "Opening the input pdf..." << endl;
63
64 PDFDoc doc((input_path + "newsletter.pdf").c_str());
65 doc.InitSecurityHandler();
66
67 int pgnum = doc.GetPageCount();
68
69 PageIterator itr;
70 ElementReader page_reader;
71
72 for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next()) // Read every page
73 {
74 page_reader.Begin(itr.Current());
75 ProcessElements(page_reader);
76 page_reader.End();
77 }
78
79 cout << "Done." << endl;
80 }
81 catch(Common::Exception& e)
82 {
83 cout << e << endl;
84 ret = 1;
85 }
86 catch(...)
87 {
88 cout << "Unknown Exception" << endl;
89 ret = 1;
90 }
91
92 PDFNet::Terminate();
93 return ret;
94}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 . "pdftron"
10)
11
12import "pdftron/Samples/LicenseKey/GO"
13// Relative path to the folder containing the test files.
14var inputPath = "../../TestFiles/"
15
16func ProcessElements(reader ElementReader){
17 element := reader.Next()
18 for element.GetMp_elem().Swigcptr() != 0{ // Read page contents
19 if element.GetType() == ElementE_path{ // Process path data...
20 //uncomment below if needed
21 //data := element.GetPathData()
22 //points := data.GetPoints()
23 }else if element.GetType() == ElementE_text{ // Process text strings...
24 data := element.GetTextString()
25 fmt.Println(data)
26 }else if element.GetType() == ElementE_form{ // Process form XObjects
27 reader.FormBegin()
28 ProcessElements(reader)
29 reader.End()
30 }
31 element = reader.Next()
32 }
33}
34
35func main(){
36 PDFNetInitialize(PDFTronLicense.Key)
37
38 // Extract text data from all pages in the document
39 fmt.Println("-------------------------------------------------")
40 fmt.Println("Sample 1 - Extract text data from all pages in the document.")
41 fmt.Println("Opening the input pdf...")
42
43 doc := NewPDFDoc(inputPath + "newsletter.pdf")
44 doc.InitSecurityHandler()
45
46 pageReader := NewElementReader()
47
48 itr := doc.GetPageIterator()
49
50 // Read every page
51 for itr.HasNext(){
52 pageReader.Begin(itr.Current())
53 ProcessElements(pageReader)
54 pageReader.End()
55 itr.Next()
56 }
57 // Close the open document to free up document memory sooner.
58 doc.Close()
59 PDFNetTerminate()
60 fmt.Println("Done.")
61}
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.common.PDFNetException;
7import com.pdftron.pdf.*;
8
9public class ElementReaderTest {
10
11 static void ProcessElements(ElementReader reader) throws PDFNetException {
12 for (Element element = reader.next(); element != null; element = reader.next()) // Read page contents
13 {
14 switch (element.getType())
15 {
16 case Element.e_path: // Process path data...
17 {
18 PathData data = element.getPathData();
19 byte[] operators = data.getOperators();
20 double[] points = data.getPoints();
21 }
22 break;
23 case Element.e_text: // Process text strings...
24 {
25 String data = element.getTextString();
26 System.out.println(data);
27 }
28 break;
29 case Element.e_form: // Process form XObjects
30 {
31 reader.formBegin();
32 ProcessElements(reader);
33 reader.end();
34 }
35 break;
36 }
37 }
38 }
39
40 public static void main(String[] args) {
41 PDFNet.initialize(PDFTronLicense.Key());
42
43 // Relative path to the folder containing test files.
44 String input_path = "../../TestFiles/";
45 String output_path = "../../TestFiles/Output/";
46
47 System.out.println("-------------------------------------------------");
48 System.out.println("Sample 1 - Extract text data from all pages in the document.");
49 System.out.println("Opening the input pdf...");
50
51 try (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf")) // Extract text data from all pages in the document
52 {
53 doc.initSecurityHandler();
54
55 int pgnum = doc.getPageCount();
56
57 PageIterator itr;
58 ElementReader page_reader = new ElementReader();
59
60 for (itr = doc.getPageIterator(); itr.hasNext(); ) // Read every page
61 {
62 page_reader.begin(itr.next());
63 ProcessElements(page_reader);
64 page_reader.end();
65 }
66 System.out.println("Done.");
67 } catch (Exception e) {
68 System.out.println(e);
69 }
70
71 PDFNet.terminate();
72 }
73}
1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10// Relative path to the folder containing the test files.
11$input_path = getcwd()."/../../TestFiles/";
12
13
14function ProcessElements($reader) {
15 for ($element=$reader->Next(); $element != null; $element = $reader->Next()) // Read page contents
16 {
17 switch ($element->GetType())
18 {
19 case Element::e_path: // Process path data...
20 {
21 $data = $element->GetPathData();
22 $points = $data->GetPoints();
23 }
24 break;
25 case Element::e_text: // Process text strings...
26 {
27 $data = $element->GetTextString();
28 echo nl2br($data."\n");
29 }
30 break;
31 case Element::e_form: // Process form XObjects
32 {
33 $reader->FormBegin();
34 ProcessElements($reader);
35 $reader->End();
36 }
37 break;
38 }
39 }
40}
41
42 PDFNet::Initialize($LicenseKey);
43 PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
44
45 // Extract text data from all pages in the document
46
47 echo nl2br("-------------------------------------------------\n");
48 echo nl2br("Sample 1 - Extract text data from all pages in the document.\n");
49 echo nl2br("Opening the input pdf...\n");
50
51 $doc = new PDFDoc($input_path."newsletter.pdf");
52 $doc->InitSecurityHandler();
53
54 $pgnum = $doc->GetPageCount();
55
56 $page_reader = new ElementReader();
57
58 for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) // Read every page
59 {
60 $page_reader->Begin($itr->Current());
61 ProcessElements($page_reader);
62 $page_reader->End();
63 }
64 PDFNet::Terminate();
65 echo nl2br("Done.\n");
66?>
1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6
7const { PDFNet } = require('@pdftron/pdfnet-node');
8const PDFTronLicense = require('../LicenseKey/LicenseKey');
9
10((exports) => {
11
12 exports.runElementReaderTest = () => {
13
14 const ProcessElements = async(reader) => {
15 // Read page contents
16 for (let element = await reader.next(); element !== null; element = await reader.next()) {
17 const temp = await element.getType();
18 switch (temp) {
19 case PDFNet.Element.Type.e_path: // Process path data...
20 {
21 const data = await element.getPathData();
22 /* eslint-disable no-unused-vars */
23 const operators = data.operators;
24 const points = data.points;
25 /* eslint-enable no-unused-vars */
26 }
27 break;
28 case PDFNet.Element.Type.e_text: // Process text strings...
29 {
30 const data = await element.getTextString();
31 console.log(data);
32 }
33 break;
34 case PDFNet.Element.Type.e_form: // Process form XObjects
35 reader.formBegin();
36 await ProcessElements(reader);
37 reader.end();
38 break;
39 default:
40 }
41 }
42 };
43
44 const main = async() => {
45 console.log('-------------------------------------------------');
46 console.log('Sample 1 - Extract text data from all pages in the document.');
47 console.log('Opening the input pdf...');
48 const ret = 0;
49
50 // Relative path to the folder containing test files.
51 const inputUrl = '../TestFiles/';
52
53 const doc = await PDFNet.PDFDoc.createFromFilePath(inputUrl + 'newsletter.pdf');// await if there is ret that we care about.
54 doc.initSecurityHandler();
55
56 // eslint-disable-next-line no-unused-vars
57 const pgnum = await doc.getPageCount();
58 const pageReader = await PDFNet.ElementReader.create();
59 const itr = await doc.getPageIterator(1);
60
61 // Read every page
62 for (itr; await itr.hasNext(); itr.next()) {
63 const curritr = await itr.current();
64 pageReader.beginOnPage(curritr);
65 await ProcessElements(pageReader);
66 pageReader.end();
67 }
68
69 console.log('Done.');
70 return ret;
71 };
72 PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function(error){console.log('Error: ' + JSON.stringify(error));}).then(function(){return PDFNet.shutdown();});
73 };
74 exports.runElementReaderTest();
75})(exports);
76// eslint-disable-next-line spaced-comment
77//# sourceURL=ElementReaderTest.js
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10import unicodedata
11
12sys.path.append("../../LicenseKey/PYTHON")
13from LicenseKey import *
14
15# Relative path to the folder containing the test files.
16input_path = "../../TestFiles/"
17
18def ProcessElements(reader):
19 element = reader.Next()
20 while element != None: # Read page contents
21 if element.GetType() == Element.e_path: # Process path data...
22 data = element.GetPathData()
23 points = data.GetPoints()
24 elif element.GetType() == Element.e_text: # Process text strings...
25 data = element.GetTextString()
26 if sys.version_info.major == 2:
27 reload(sys)
28 sys.setdefaultencoding("utf-8")
29 data = unicodedata.normalize('NFKC', unicode(data)).encode('ascii','replace')
30 print(data)
31 elif element.GetType() == Element.e_form: # Process form XObjects
32 reader.FormBegin()
33 ProcessElements(reader)
34 reader.End()
35 element = reader.Next()
36
37def main():
38 PDFNet.Initialize(LicenseKey)
39
40 # Extract text data from all pages in the document
41 print("-------------------------------------------------")
42 print("Sample 1 - Extract text data from all pages in the document.")
43 print("Opening the input pdf...")
44
45 doc = PDFDoc(input_path + "newsletter.pdf")
46 doc.InitSecurityHandler()
47
48 page_reader = ElementReader()
49
50 itr = doc.GetPageIterator()
51
52 # Read every page
53 while itr.HasNext():
54 page_reader.Begin(itr.Current())
55 ProcessElements(page_reader)
56 page_reader.End()
57 itr.Next()
58
59 # Close the open document to free up document memory sooner.
60 doc.Close()
61 PDFNet.Terminate()
62 print("Done.")
63
64if __name__ == '__main__':
65 main()
1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12# Relative path to the folder containing the test files.
13input_path = "../../TestFiles/"
14
15def ProcessElements(reader)
16 element = reader.Next()
17 while !element.nil? do # Read page contents
18 if element.GetType() == Element::E_path # Process path data...
19 data = element.GetPathData()
20 points = data.GetPoints()
21 elsif element.GetType() == Element::E_text # Process text strings...
22 data = element.GetTextString()
23 puts data
24 elsif element.GetType() == Element::E_form # Process form XObjects
25 reader.FormBegin()
26 ProcessElements(reader)
27 reader.End()
28 end
29 element = reader.Next()
30 end
31end
32
33 PDFNet.Initialize(PDFTronLicense.Key)
34
35 # Extract text data from all pages in the document
36 puts "-------------------------------------------------"
37 puts "Sample 1 - Extract text data from all pages in the document."
38 puts "Opening the input pdf..."
39
40 doc = PDFDoc.new(input_path + "newsletter.pdf")
41 doc.InitSecurityHandler()
42
43 page_reader = ElementReader.new()
44
45 itr = doc.GetPageIterator()
46
47 # Read every page
48 while itr.HasNext() do
49 page_reader.Begin(itr.Current())
50 ProcessElements(page_reader)
51 page_reader.End()
52 itr.Next()
53 end
54
55 # Close the open document to free up document memory sooner.
56 doc.Close()
57 PDFNet.Terminate
58 puts "Done."
1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6
7Imports pdftron
8Imports pdftron.Common
9Imports pdftron.Filters
10Imports pdftron.SDF
11Imports pdftron.PDF
12
13Module ElementReaderTestVB
14 Dim pdfNetLoader As PDFNetLoader
15 Sub New()
16 pdfNetLoader = pdftron.PDFNetLoader.Instance()
17 End Sub
18
19
20 Sub ProcessElements(ByVal reader As ElementReader)
21 Dim element As Element = reader.Next()
22 While Not IsNothing(element) ' Read page contents
23 Select element.GetType()
24 Case Element.Type.e_path
25 ' Process path data...
26 Dim pathData As PathData = element.GetPathData()
27 Dim data As Double() = pathData.points
28
29 Case Element.Type.e_text
30 ' Process text strings...
31 Dim result As String = element.GetTextString()
32 Console.WriteLine(result)
33
34 Case Element.Type.e_form
35 ' Process form XObjects
36 reader.FormBegin()
37 Console.WriteLine("Process Element.Type.e_form")
38 ProcessElements(reader)
39 reader.End()
40 End Select
41 element = reader.Next()
42 End While
43 End Sub
44
45 Sub Main()
46
47 PDFNet.Initialize(PDFTronLicense.Key)
48
49 ' Relative path to the folder containing test files.
50 Dim input_path As String = "../../../../TestFiles/"
51 'Dim output_path As String = "../../../../TestFiles/Output/"
52
53 Console.WriteLine("-------------------------------------------------")
54 Console.WriteLine("Sample 1 - Extract text data from all pages in the document.")
55
56 ' Open the test file
57 Console.WriteLine("Opening the input pdf...")
58 Using doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf")
59 Using page_reader As ElementReader = New ElementReader
60 doc.InitSecurityHandler()
61 Dim itr As PageIterator = doc.GetPageIterator()
62 While itr.HasNext() ' Read every page
63 page_reader.Begin(itr.Current())
64 ProcessElements(page_reader)
65 page_reader.End()
66 itr.Next()
67 End While
68 End Using
69 End Using
70 PDFNet.Terminate()
71 Console.WriteLine("Done.")
72
73 End Sub
74
75End Module
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales