More languages
Some test text!
More languages
Sample PHP code for using PDFTron SDK to traverse the page display list using ElementReader. Learn more about our PHP PDF Library and PDF Parsing & Content Extraction Library.
Get Started Samples DownloadTo run this sample, get started with a free trial of Apryse SDK.
<?php
//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2022 by PDFTron Systems Inc. All Rights Reserved.
// Consult LICENSE.txt regarding license information.
//---------------------------------------------------------------------------------------
if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
include("../../../PDFNetC/Lib/PDFNetPHP.php");
include("../../LicenseKey/PHP/LicenseKey.php");
// Relative path to the folder containing the test files.
$input_path = getcwd()."/../../TestFiles/";
function ProcessElements($reader) {
for ($element=$reader->Next(); $element != null; $element = $reader->Next()) // Read page contents
{
switch ($element->GetType())
{
case Element::e_path: // Process path data...
{
$data = $element->GetPathData();
$points = $data->GetPoints();
}
break;
case Element::e_text: // Process text strings...
{
$data = $element->GetTextString();
echo nl2br($data."\n");
}
break;
case Element::e_form: // Process form XObjects
{
$reader->FormBegin();
ProcessElements($reader);
$reader->End();
}
break;
}
}
}
PDFNet::Initialize($LicenseKey);
PDFNet::GetSystemFontList(); // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
// Extract text data from all pages in the document
echo nl2br("-------------------------------------------------\n");
echo nl2br("Sample 1 - Extract text data from all pages in the document.\n");
echo nl2br("Opening the input pdf...\n");
$doc = new PDFDoc($input_path."newsletter.pdf");
$doc->InitSecurityHandler();
$pgnum = $doc->GetPageCount();
$page_reader = new ElementReader();
for ($itr = $doc->GetPageIterator(); $itr->HasNext(); $itr->Next()) // Read every page
{
$page_reader->Begin($itr->Current());
ProcessElements($page_reader);
$page_reader->End();
}
PDFNet::Terminate();
echo nl2br("Done.\n");
?>