ElementReader

Sample C# code for using Apryse SDK to traverse the page display list using ElementReader. Learn more about our Server SDK and PDF Data Extraction SDK Capabilities.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using pdftron;
7using pdftron.Common;
8using pdftron.Filters;
9using pdftron.SDF;
10using pdftron.PDF;
11
12namespace ElementReaderTestCS
13{
14 /// <summary>
15 /// Summary description for Class1.
16 /// </summary>
17 class Class1
18 {
19 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
20 static Class1() {}
21
22 static void ProcessElements(ElementReader reader)
23 {
24 Element element;
25 while ((element = reader.Next()) != null) // Read page contents
26 {
27 switch (element.GetType())
28 {
29
30 case Element.Type.e_path: // Process path data...
31 {
32 PathData data = element.GetPathData();
33 double[] points = data.points;
34 break;
35 }
36
37 case Element.Type.e_text: // Process text strings...
38 {
39 String str = element.GetTextString();
40 Console.WriteLine(str);
41 break;
42 }
43
44 case Element.Type.e_form: // Process form XObjects
45 {
46 Console.WriteLine("Process Element.Type.e_form");
47 reader.FormBegin();
48 ProcessElements(reader);
49 reader.End();
50 break;
51 }
52 }
53 }
54 }
55
56 /// <summary>
57 /// The main entry point for the application.
58 /// </summary>
59 [STAThread]
60 static void Main(string[] args)
61 {
62 PDFNet.Initialize(PDFTronLicense.Key);
63
64 // Relative path to the folder containing test files.
65 string input_path = "../../../../TestFiles/";
66
67 try
68 {
69 Console.WriteLine("-------------------------------------------------");
70 Console.WriteLine("Sample 1 - Extract text data from all pages in the document.");
71
72 // Open the test file
73 Console.WriteLine("Opening the input pdf...");
74 using (PDFDoc doc = new PDFDoc(input_path + "newsletter.pdf"))
75 using (ElementReader page_reader = new ElementReader())
76 {
77 doc.InitSecurityHandler();
78
79 PageIterator itr;
80 for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next()) // Read every page
81 {
82 page_reader.Begin(itr.Current());
83 ProcessElements(page_reader);
84 page_reader.End();
85 }
86 Console.WriteLine("Done.");
87 }
88
89 }
90 catch (PDFNetException e)
91 {
92 Console.WriteLine(e.Message);
93 }
94 PDFNet.Terminate();
95 }
96 }
97}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales