ElementReader

Sample JavaScript code for using Apryse SDK to traverse the page display list using ElementReader. Learn more about our Web SDK and PDF Data Extraction SDK Capabilities.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6(exports => {
7
8
9
10
11 exports.runElementReaderTest = () => {
12 const PDFNet = exports.Core.PDFNet;
13
14 const ProcessElements = async reader => {
15 // Read page contents
16 for (let element = await reader.next(); element !== null; element = await reader.next()) {
17 const temp = await element.getType();
18 switch (temp) {
19 case PDFNet.Element.Type.e_path: // Process path data...
20 {
21 const data = await element.getPathData();
22 /* eslint-disable @typescript-eslint/no-unused-vars */
23 const operators = data.operators;
24 const points = data.points;
25 /* eslint-enable @typescript-eslint/no-unused-vars */
26 }
27 break;
28 case PDFNet.Element.Type.e_text: // Process text strings...
29 {
30 const data = await element.getTextString();
31 console.log(data);
32 }
33 break;
34 case PDFNet.Element.Type.e_form: // Process form XObjects
35 reader.formBegin();
36 await ProcessElements(reader);
37 reader.end();
38 break;
39 default:
40 }
41 }
42 };
43
44 const main = async () => {
45 console.log('-------------------------------------------------');
46 console.log('Sample 1 - Extract text data from all pages in the document.');
47 console.log('Opening the input pdf...');
48 const ret = 0;
49
50 // Relative path to the folder containing test files.
51 const inputUrl = '../TestFiles/';
52
53 const doc = await PDFNet.PDFDoc.createFromURL(inputUrl + 'newsletter.pdf'); // await if there is ret that we care about.
54 doc.initSecurityHandler();
55 doc.lock();
56
57 // eslint-disable-next-line @typescript-eslint/no-unused-vars
58 const pgnum = await doc.getPageCount();
59 const pageReader = await PDFNet.ElementReader.create();
60 const itr = await doc.getPageIterator(1);
61
62 // Read every page
63 for (itr; await itr.hasNext(); itr.next()) {
64 const curritr = await itr.current();
65 pageReader.beginOnPage(curritr);
66 await ProcessElements(pageReader);
67 pageReader.end();
68 }
69
70 console.log('Done.');
71 return ret;
72 };
73
74 // add your own license key as the second parameter, e.g. PDFNet.runWithCleanup(main, 'YOUR_LICENSE_KEY')
75 PDFNet.runWithCleanup(main);
76 };
77})(window);
78// eslint-disable-next-line spaced-comment
79//# sourceURL=ElementReaderTest.js

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales