Some test text!

Discord Logo

Chat with us

PDFTron is now Apryse, learn more here.

Read elements across all PDF pages in PHP

More languages

More languages
JavaScript
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample PHP code for using PDFTron SDK to traverse the page display list using ElementReader. Learn more about our PHP PDF Library and PDF Parsing & Content Extraction Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

<?php
//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
// Consult LICENSE.txt regarding license information.
//---------------------------------------------------------------------------------------
if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
include("../../../PDFNetC/Lib/PDFNetPHP.php");
include("../../LicenseKey/PHP/LicenseKey.php");

// Relative path to the folder containing the test files.
$input_path = getcwd()."/../../TestFiles/";


function ProcessElements($reader) {
	for ($element=$reader->Next(); $element != null; $element = $reader->Next()) 	// Read page contents
	{
		switch ($element->GetType())
		{
		case Element::e_path:						// Process path data...
			{
				$data = $element->GetPathData();
				$points = $data->GetPoints();
			}
 			break; 
		case Element::e_text: 				// Process text strings...
			{
				$data = $element->GetTextString();
				echo nl2br($data."\n");
			}
			break;
		case Element::e_form:				// Process form XObjects
			{
				$reader->FormBegin(); 
                		ProcessElements($reader);
				$reader->End(); 
			}
			break; 
		}
	}
}

	PDFNet::Initialize($LicenseKey);
	PDFNet::GetSystemFontList();    // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
	
	// Extract text data from all pages in the document

	echo nl2br("-------------------------------------------------\n");
	echo nl2br("Sample 1 - Extract text data from all pages in the document.\n");
	echo nl2br("Opening the input pdf...\n");
	
	$doc = new PDFDoc($input_path."newsletter.pdf");
	$doc->InitSecurityHandler();

	$pgnum = $doc->GetPageCount();
		
	$page_reader = new ElementReader();

	for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next())		//  Read every page
	{		
		$page_reader->Begin($itr->Current());
		ProcessElements($page_reader);
		$page_reader->End();
	}
	PDFNet::Terminate();
	echo nl2br("Done.\n");
?>