Some test text!

Discord Logo

Chat with us

PDFTron is now Apryse, learn more here.

Read elements across all PDF pages in C++

More languages

More languages
JavaScript
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample C++ code for using PDFTron SDK to traverse the page display list using ElementReader. Learn more about our C++ PDF Library and PDF Parsing & Content Extraction Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

#include <PDF/PDFNet.h>
#include <PDF/PDFDoc.h>
#include <PDF/ElementReader.h>
#include <PDF/Element.h>
#include <iostream>
#include "../../LicenseKey/CPP/LicenseKey.h"

using namespace pdftron;
using namespace std;
using namespace PDF;

void ProcessElements(ElementReader& reader) 
{
	for (Element element=reader.Next(); element; element = reader.Next()) 	// Read page contents
	{
		switch (element.GetType())
		{
			case Element::e_path:				 // Process path data...
			{
					PathData data = element.GetPathData();
					const std::vector<unsigned char>& operators = data.GetOperators();
					const std::vector<double>& points = data.GetPoints();
			}
			break; 
			
			case Element::e_text: 				// Process text strings...
			{
				const UString data = element.GetTextString();
				cout << data << endl;
			}
			break;
			
			case Element::e_form:				// Process form XObjects
			{
					reader.FormBegin(); 
					ProcessElements(reader);
					reader.End(); 
			}
			break; 
		}
	}
}


int main(int argc, char *argv[])
{
	int ret = 0;
	PDFNet::Initialize(LicenseKey);

	// Relative path to the folder containing test files.
	string input_path =  "../../TestFiles/";

	try	// Extract text data from all pages in the document
	{
		cout << "-------------------------------------------------" << endl;
		cout << "Sample 1 - Extract text data from all pages in the document." << endl;
		cout << "Opening the input pdf..." << endl;

		PDFDoc doc((input_path + "newsletter.pdf").c_str());
		doc.InitSecurityHandler();

		int pgnum = doc.GetPageCount();
		
		PageIterator itr;
		ElementReader page_reader;

		for (itr = doc.GetPageIterator(); itr.HasNext(); itr.Next())		//  Read every page
		{				
			page_reader.Begin(itr.Current());
			ProcessElements(page_reader);
			page_reader.End();
		}

		cout << "Done." << endl;
	}
	catch(Common::Exception& e)
	{
		cout << e << endl;
		ret = 1;
	}
	catch(...)
	{
		cout << "Unknown Exception" << endl;
		ret = 1;
	}

	PDFNet::Terminate();
	return ret;
}