Element Reader

Sample JavaScript code for using Apryse SDK to traverse the page display list using ElementReader.

Learn more about our Web SDK and PDF Data Extraction SDK Capabilities.

This sample works with Full-API for WebViewer.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6(exports => {
7
8
9
10
11 exports.runElementReaderTest = () => {
12 const PDFNet = exports.Core.PDFNet;
13
14 const ProcessElements = async reader => {
15 // Read page contents
16 for (let element = await reader.next(); element !== null; element = await reader.next()) {
17 const temp = await element.getType();
18 switch (temp) {
19 case PDFNet.Element.Type.e_path: // Process path data...
20 {
21 const data = await element.getPathData();
22 /* eslint-disable @typescript-eslint/no-unused-vars */
23 const operators = data.operators;
24 const points = data.points;
25 /* eslint-enable @typescript-eslint/no-unused-vars */
26 }
27 break;
28 case PDFNet.Element.Type.e_text: // Process text strings...
29 {
30 const data = await element.getTextString();
31 console.log(data);
32 }
33 break;
34 case PDFNet.Element.Type.e_form: // Process form XObjects
35 reader.formBegin();
36 await ProcessElements(reader);
37 reader.end();
38 break;
39 default:
40 }
41 }
42 };
43
44 const main = async () => {
45 console.log('-------------------------------------------------');
46 console.log('Sample 1 - Extract text data from all pages in the document.');
47 console.log('Opening the input pdf...');
48 const ret = 0;
49
50 // Relative path to the folder containing test files.
51 const inputUrl = '../TestFiles/';
52
53 const doc = await PDFNet.PDFDoc.createFromURL(inputUrl + 'newsletter.pdf'); // await if there is ret that we care about.
54 doc.initSecurityHandler();
55 doc.lock();
56
57 // eslint-disable-next-line @typescript-eslint/no-unused-vars
58 const pgnum = await doc.getPageCount();
59 const pageReader = await PDFNet.ElementReader.create();
60 const itr = await doc.getPageIterator(1);
61
62 // Read every page
63 for (itr; await itr.hasNext(); itr.next()) {
64 const curritr = await itr.current();
65 pageReader.beginOnPage(curritr);
66 await ProcessElements(pageReader);
67 pageReader.end();
68 }
69
70 console.log('Done.');
71 return ret;
72 };
73
74 // add your own license key as the second parameter, e.g. PDFNet.runWithCleanup(main, 'YOUR_LICENSE_KEY')
75 PDFNet.runWithCleanup(main);
76 };
77})(window);
78// eslint-disable-next-line spaced-comment
79//# sourceURL=ElementReaderTest.js

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales