Some test text!

Search
Hamburger Icon

Read elements across all PDF pages in JavaScript

More languages

More languages
JavaScript
Java (Android)
C++
C#
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample JavaScript code for using Apryse SDK to traverse the page display list using ElementReader. Learn more about our JavaScript PDF Library and PDF Parsing & Content Extraction Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

JavaScript

HTML

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

(exports => {




  exports.runElementReaderTest = () => {
    const PDFNet = exports.Core.PDFNet;

    const ProcessElements = async reader => {
      // Read page contents
      for (let element = await reader.next(); element !== null; element = await reader.next()) {
        const temp = await element.getType();
        switch (temp) {
          case PDFNet.Element.Type.e_path: // Process path data...
            {
              const data = await element.getPathData();
              /* eslint-disable @typescript-eslint/no-unused-vars */
              const operators = data.operators;
              const points = data.points;
              /* eslint-enable @typescript-eslint/no-unused-vars */
            }
            break;
          case PDFNet.Element.Type.e_text: // Process text strings...
            {
              const data = await element.getTextString();
              console.log(data);
            }
            break;
          case PDFNet.Element.Type.e_form: // Process form XObjects
            reader.formBegin();
            await ProcessElements(reader);
            reader.end();
            break;
          default:
        }
      }
    };

    const main = async () => {
      console.log('-------------------------------------------------');
      console.log('Sample 1 - Extract text data from all pages in the document.');
      console.log('Opening the input pdf...');
      const ret = 0;

      // Relative path to the folder containing test files.
      const inputUrl = '../TestFiles/';

      const doc = await PDFNet.PDFDoc.createFromURL(inputUrl + 'newsletter.pdf'); // await if there is ret that we care about.
      doc.initSecurityHandler();
      doc.lock();

      // eslint-disable-next-line @typescript-eslint/no-unused-vars
      const pgnum = await doc.getPageCount();
      const pageReader = await PDFNet.ElementReader.create();
      const itr = await doc.getPageIterator(1);

      // Read every page
      for (itr; await itr.hasNext(); itr.next()) {
        const curritr = await itr.current();
        pageReader.beginOnPage(curritr);
        await ProcessElements(pageReader);
        pageReader.end();
      }

      console.log('Done.');
      return ret;
    };

    // add your own license key as the second parameter, e.g. PDFNet.runWithCleanup(main, 'YOUR_LICENSE_KEY')
    PDFNet.runWithCleanup(main);
  };
})(window);
// eslint-disable-next-line spaced-comment
//# sourceURL=ElementReaderTest.js