To read page content from a document.
C# C++ Go Java JavaScript Obj-C PHP Python Ruby VB 
1 PDFDoc  doc  = new  PDFDoc (filename); 
2 ElementReader  reader  = new  ElementReader (); 
3 
4 //  Read page content on every page in the document 
5 for  ( PageIterator  itr = doc. GetPageIterator (); itr. HasNext (); itr. Next ()) 
6 { 
7   // Read the page 
8   reader. Begin (itr. Current ()); 
9   ProcessElements (reader); 
10   reader. End (); 
11 } 
12 
13 void  ProcessElements ( ElementReader  reader ) 
14 { 
15   Element  element; 
16 
17   // Traverse the page display list 
18   while  ((element  =  reader. Next ())  !=  null ) 
19   { 
20     switch  (element. GetType ()) 
21     { 
22       case  Element . ElementType . e_path : 
23       { 
24         if  (element. IsClippingPath ()) 
25         {} 
26         // ... 
27         break ; 
28       } 
29       case  Element . ElementType . e_text : 
30       { 
31         Matrix2D  text_mtx  =  element. GetTextMatrix (); 
32         // ... 
33         break ; 
34       } 
35       case  Element . ElementType . e_form : 
36       { 
37         reader. FormBegin (); 
38         ProcessElements (reader); 
39         reader. End (); 
40         break ; 
41       } 
42     } 
43   } 
44 } 
1 PDFDoc  doc ( filename ); 
2 ElementReader reader; 
3 
4 //  Read page content on every page in the document 
5 PageIterator itr; 
6 for  (itr = doc. GetPageIterator (); itr. HasNext (); itr. Next ()) 
7 { 
8   // Read the page 
9   reader. Begin (itr. Current ()); 
10   ProcessElements (reader); 
11   reader. End (); 
12 } 
13 
14 void  ProcessElements ( ElementReader  reader ) 
15 { 
16   Element element; 
17 
18   // Traverse the page display list 
19   while  ((element  =  reader. Next ())  !=  null) 
20   { 
21     switch  (element. GetType ()) 
22     { 
23       case  Element :: e_path: 
24       { 
25         if  (element. IsClippingPath ()) 
26         {} 
27         // ... 
28         break ; 
29       } 
30       case  Element :: e_text: 
31       { 
32         Matrix2D text_mtx  =  element. GetTextMatrix (); 
33         // ... 
34         break ; 
35       } 
36       case  Element :: e_form: 
37       { 
38         reader. FormBegin (); 
39         ProcessElements (reader); 
40         reader. End (); 
41         break ; 
42       } 
43     } 
44   } 
45 } 
1 doc  :=  NewPDFDoc (filename) 
2 reader  :=  NewElementReader () 
3 
4 //  Read page content on every page in the document 
5 itr  :=  doc. GetPageIterator () 
6 for  itr. HasNext (){ 
7     page  :=  itr. Current () 
8     reader. Begin (page) 
9     ProcessElements (reader) 
10     reader. End () 
11     itr. Next () 
12 } 
13 
14 func  ProcessElements ( reader  ElementReader ){ 
15   element  :=  reader. Next () 
16 
17   // Traverse the page display list 
18   for  element. GetMp_elem (). Swigcptr ()  !=  0 { 
19     etype  :=  element. GetType () 
20     if  etype  ==  ElementE_path{ 
21       if  (element. IsClippingPath ()) 
22       {} 
23       // ... 
24     } else if  etype  ==  ElementE_text { 
25       text_mtx  :=  element. GetTextMatrix () 
26       // ... 
27     } else if  etype  ==  ElementE_form { 
28       reader. FormBegin () 
29       ProcessElements (reader) 
30       reader. End () 
31     } 
32     element  =  reader. Next () 
33 	} 
34 } 
1 PDFDoc  doc  =  new  PDFDoc (filename); 
2 ElementReader  reader  =  new  ElementReader (); 
3 
4 //  Read page content on every page in the document 
5 for  ( PageIterator  itr = doc. getPageIterator (); itr. hasNext (); itr. next ()) 
6 { 
7   // Read the page 
8   reader. begin (itr. current ()); 
9   ProcessElements (reader); 
10   reader. end (); 
11 } 
12 
13 void  ProcessElements ( ElementReader  reader) 
14 { 
15   Element  element; 
16 
17   // Traverse the page display list 
18   while  ((element  =  reader. next ())  !=  null ) 
19   { 
20     switch  (element. getType ()) 
21     { 
22       case  Element.e_path : 
23       { 
24         if  (element. isClippingPath ()) 
25         {} 
26         // ... 
27         break ; 
28       } 
29       case  Element.e_text : 
30       { 
31         Matrix2D  text_mtx  =  element. getTextMatrix (); 
32         // ... 
33         break ; 
34       } 
35       case  Element.ElementType.e_form : 
36       { 
37         reader. formBegin (); 
38         ProcessElements (reader); 
39         reader. end (); 
40         break ; 
41       } 
42     } 
43   } 
44 } 
1 async function  main () { 
2   const  doc  = await  PDFNet.PDFDoc. createFromURL (filename); 
3   const  reader  = await  PDFNet.ElementReader. create (); 
4 
5   //  Read page content on every page in the document 
6   const  itr  = await  doc. getPageIterator (); 
7   for  (itr;  await  itr. hasNext (); itr. next ()) 
8   { 
9     // Read the page 
10     const  page  = await  itr. current (); 
11     reader. beginOnPage (page); 
12     await  ProcessElements (reader); 
13     reader. end (); 
14   } 
15 
16   async function  ProcessElements ( reader ) 
17   { 
18     // Traverse the page display list 
19     for  ( let  element  = await  reader. next (); element  !==  null ; element  = await  reader. next ()) { 
20       const  elementType  = await  element. getType (); 
21       switch  (elementType) 
22       { 
23         case  PDFNet.Element.Type.e_path: 
24         { 
25           if  (element. isClippingPath ()) 
26           {} 
27           // ... 
28           break ; 
29         } 
30         case  PDFNet.Element.Type.e_text: 
31         { 
32           const  text_mtx  =  element. getTextMatrix (); 
33           // ... 
34           break ; 
35         } 
36         case  PDFNet.Element.Type.e_form: 
37         { 
38           reader. formBegin (); 
39           ProcessElements (reader); 
40           reader. end (); 
41           break ; 
42         } 
43       } 
44     } 
45   } 
46 } 
47 PDFNet. runWithCleanup (main); 
1 PTPDFDoc  * doc  =  [[PTPDFDoc  alloc ]  initWithFilepath : filename]; 
2 PTElementReader  * reader  =  [[PTElementReader  alloc ]  init ]; 
3 
4 for  (PageIterator  * itr = [doc  GetPageIterator :  1 ]; [itr  HasNext ]; [itr  Next ]) 
5 { 
6   // Read the page 
7   [reader  Begin : [itr  Current ]]; 
8   ProcessElements (reader); 
9   [reader  End ]; 
10 } 
11 
12 void  ProcessElements (PTElementReader  * reader ) 
13 { 
14   // Traverse the page display list 
15   PTElement  * element;	 
16   while  ((element  =  [reader  Next ])) 
17   { 
18     switch  ([element  GetType ]) 
19     {	 
20       case  e_ptpath: 
21       { 
22         if  ([element  IsClippingPath ]) 
23         {} 
24         // ... 
25         break ; 
26       } 
27       case  e_pttext_obj: 
28       { 
29         PTMatrix2D  * text_mtx  =  [element  GetTextMatrix ]; 
30         // ... 
31         break ; 
32       } 
33       case  e_ptform: 
34       { 
35         [reader  FormBegin ]; 
36         ProcessElements (reader); 
37         [reader  End ]; 
38         break ; 
39       }  
40     } 
41   } 
42 } 
1 $doc  = new  PDFDoc ($filename); 
2 $reader  = new  ElementReader (); 
3 
4 for  ($itr  =  $doc -> GetPageIterator (); $itr -> HasNext (); $itr -> Next ())	 
5 { 
6   // Read the page 
7   $reader -> Begin ($itr -> Current ()); 
8   ProcessElements ($reader); 
9   $reader -> End (); 
10 } 
11 
12 function  ProcessElements ($reader) 
13 { 
14   // Traverse the page display list 
15   while  (($element  =  $reader -> Next ())  !=  null ) 
16   { 
17     switch  ($element -> GetType ()) 
18     {	 
19       case  Element :: e_path : 
20       { 
21         if  ($element -> IsClippingPath ()) 
22         {} 
23         // ... 
24         break ; 
25       } 
26       case  Element :: e_text : 
27       { 
28         $text_mtx  =  $element -> GetTextMatrix (); 
29         // ... 
30         break ; 
31       } 
32       case  Element :: e_form : 
33       { 
34         $reader -> FormBegin (); 
35         ProcessElements ($reader); 
36         $reader -> End (); 
37       } 
38     } 
39   } 
40 } 
1 doc  =  PDFDoc(filename) 
2 reader  =  ElementReader() 
3 
4 itr  =  doc.GetPageIterator() 
5 while  itr.HasNext(): 
6   # Read the page 
7   reader.Begin(itr.Current()) 
8   ProcessElements(reader) 
9   reader.End() 
10   itr.Next() 
11 
12 def  ProcessElements ( reader ): 
13   # Traverse the page display list 
14   element  =  reader.Next() 
15   while  element  !=  None : 
16     type  =  element.GetType() 
17     if  type  ==  Element.e_path: 
18       if  element.IsClippingPath(): 
19         pass 
20       # ... 
21     elif  type  ==  Element.e_text: 
22       text_mtx  =  element.GetTextMatrix() 
23       # ... 
24     elif  type  ==  Element.e_form: 
25       reader.FormBegin() 
26       ProcessElements(reader) 
27       reader.End() 
28     element  =  reader.Next() 
1 doc =  PDFDoc . new (filename) 
2 reader =  ElementReader . new () 
3 
4 itr = doc. GetPageIterator () 
5 while  itr. HasNext ()  do 
6   # Read the page 
7   reader. Begin (itr. Current ()) 
8   ProcessElements (reader) 
9   reader. End () 
10   itr. Next () 
11 end 
12 
13 def  ProcessElements ( reader ) 
14   # Traverse the page display list 
15   element = reader. Next () 
16   while ! element. nil?  do 
17     type = element. GetType () 
18     case  type 
19     when  Element :: E_path 
20       if  (element. IsClippingPath ()) 
21       end 
22       # ... 
23     when  Element :: E_text 
24       text_mtx = element. GetTextMatrix () 
25       # ... 
26     when  Element :: E_form: 
27       reader. FormBegin () 
28       ProcessElements (reader) 
29       reader. End () 
30     end 
31   end 
32 end 
1 Dim  doc  As  PDFDoc  = New  PDFDoc (filename) 
2 Dim  reader  As  ElementReader  = New  ElementReader 
3 
4 Dim  itr  As  PageIterator  =  doc. GetPageIterator () 
5 While  itr. HasNext () 
6   ' Read the page 
7   reader. Begin (itr. Current ()) 
8   ProcessElements (reader) 
9   reader. End () 
10 End  While 
11 
12 Sub  ProcessElements ( ByVal reader As ElementReader ) 
13   ' Traverse the page display list 
14   Dim  element  As  Element  =  reader. Next () 
15   While Not  IsNothing (element) 
16     If  element. GetType ()  =  element.Type.e_path  Then 
17       If  element. IsClippingPath ()  then 
18       End If 
19       ' ... 
20     ElseIf  element. GetType ()  =  element.Type.e_text  Then 
21       Dim  text_mtx  As  Matrix2D  =  element. GetTextMatrix () 
22       ' ... 
23     ElseIf  element. GetType ()  =  element.Type.e_form  Then 
24       reader. FormBegin () 
25       ProcessElements (reader) 
26       reader. End () 
27     End If 
28     element  =  reader. Next () 
29   End  While 
30 End Sub 
Read Elements Across All PDF Pages 
About reading page content Page content is represented as a sequence of graphical Elements such as paths, text, images, and forms. The only effect of the ordering of Elements in the display list is the order in which Elements are painted. Elements that occur later in the display list can obscure earlier elements.
A display list can be traversed using an ElementReader object. To start traversing the display list, call reader.Begin(). Then, reader.Next() will return subsequent Elements until null is returned (marking the end of the display list). While ElementReader only works with one page at a time, the same ElementReader object may be reused to process multiple pages.
A PDF page display list may contain child display lists of Form XObjects, Type3 font glyphs, and tiling patterns. A form XObject is a self-contained description of any sequence of graphics objects (such as path objects, text objects, and sampled images), defined as a PDF content stream. It may be painted multiple times — either on several pages or at several locations on the same page — and will produce the same results each time (subject only to the graphics state at the time the Form XObject is painted). In order to open a child display list for a Form XObject, call the reader.FormBegin() method. To return processing to the parent display list call reader.End(). Processing of the Form XObject display (traversing the child display list) is illustrated below.
Note that, in the above sample code, a child display list is opened when an element with type Element.ElementType.e_form is encountered by the reader.FormBegin() method. The child display list becomes the current display list until it is closed using reader.End(). At this point the processing is returned to the parent display list and the next Element returned will be the Element following the Form XObject. Also note that, because Form XObjects may be nested, a sub-display list could have its own child display lists. The sample above shows traversing these nested Form XObjects recursively.
Similarly, a pattern display list can be opened using reader.PatternBegin(), and a Type3 glyph display list can be opened using the reader.Type3FontBegin() method.