PDF Text Extractor Showcase Demo Code Sample

Requirements

Quickly extract text from PDFs—either from entire pages or just the highlighted sections.

This demo allows you to:

Upload your own PDF file
Highlighting text to perform extraction
Preview extracted text: Full page and highlighted text

Implementation steps
To add PDF Text Extraction capability with WebViewer:

Step 1: Choose your preferred web stack for WebViewer
Step 2: Add the ES6 JavaScript sample code provided in this guide

1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/index.js
4import WebViewer from '@pdftron/webviewer';
5
6// Global variables to track state
7let redactionDemoFile = "https://apryse.s3.amazonaws.com/public/files/samples/section-508.pdf";
8let pageCount = 0;
9let textContent = '';
10let annotTextContent = '';
11let currentPage = 0;
12
13// Function to initialize and load the Redaction Tool
14function initializeWebViewer() {
15
16  const element = document.getElementById('viewer');
17  if (!element) {
18    console.error('Viewer div not found.');
19    return;
20  }
21
22  WebViewer({
23    path: '/lib',
24    initialDoc: redactionDemoFile,
25    licenseKey: 'YOUR_LICENSE_KEY',
26    fullAPI: true,
27    enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File  
28  }, element).then(instance => {
29    // define documentViewer for use in other functions
30    const { documentViewer } = instance.Core;
31    documentViewer.addEventListener('documentLoaded', () => {
32      const { UI } = instance;
33      UI.setLayoutMode(UI.LayoutMode.Single);                  // Set the layout mode to single page view     
34      UI.disableFadePageNavigationComponent();                 // Keeps the page navigation component on screen all the time
35      pageCount = documentViewer.getDocument().getPageCount(); // Update page count   
36      setPage(1);                                              //Set 1st page to trigger text extraction
37    });
38    // Event listeners for page changes
39    documentViewer.addEventListener('pageNumberUpdated', viewerUpdated);
40    // Annotation change listener to update annotation text when annotations are modified
41    instance.Core.documentViewer
42      .getAnnotationManager()
43      .addEventListener('annotationChanged', getAnnotListener);
44    // UI Section 
45    createUIElements();
46    // Trigger immediate UI update if available
47    if (window.updateUIContent) {
48      window.updateUIContent();
49    }
50  });
51}
52
53// Function to extract all text from a given page
54async function getAllTextFromDocument(pageNumber) {
55  const doc = window.WebViewer.getInstance().Core.documentViewer.getDocument();
56  if (pageNumber > 0 && doc) {
57    await getText(pageNumber);     // Only proceed if we have a valid document
58    await getAnnotText(pageNumber);
59  } else {
60    console.warn('Document not available or invalid page number');
61  }
62};
63
64// Function to extract text from a given page
65async function getText(pageNumber) {
66  const { documentViewer } = window.WebViewer.getInstance().Core;
67  const doc = documentViewer.getDocument();
68  // Check if document is loaded before proceeding
69  if (!doc) {
70    return;
71  }
72
73  const newPageCount = doc.getPageCount();
74  await doc.loadPageText(pageNumber, (newText) => {
75    textContent = newText;
76    pageCount = newPageCount;
77    pageNumber = pageNumber;
78  });
79}
80
81// Function to extract text under annotations on a given page
82async function getAnnotText(pageNumber) {
83  const { PDFNet, documentViewer } = window.WebViewer.getInstance().Core;
84  await PDFNet.initialize();
85  await documentViewer.getAnnotationsLoadedPromise(); // Ensure annotations are loaded
86  const doc = await getPDFDocument(documentViewer, PDFNet);
87  const annotationManager = documentViewer.getAnnotationManager();
88  const annotList = annotationManager
89    .getAnnotationsList()
90    .filter((a) => a.getPageNumber() === pageNumber);
91  const xfdf_string = await annotationManager.exportAnnotations({ annotationList: annotList });
92  const textOutput = [];
93
94  if (!doc) {
95    console.warn('PDF document not available');
96    return;
97  }
98
99  // Run PDFNet methods with memory management
100  await PDFNet.runWithCleanup(async () => {
101    // lock the document before a write operation
102    // runWithCleanup will auto unlock when complete
103    try {
104      doc.lock();
105      const fdf_doc = await PDFNet.FDFDoc.createFromXFDF(xfdf_string);
106      await doc.fdfUpdate(fdf_doc);
107      const pageTemp = await doc.getPage(pageNumber);
108      const rect = await pageTemp.getCropBox();
109      const te = await PDFNet.TextExtractor.create();
110      te.begin(pageTemp, rect);
111      const annotCount = await pageTemp.getNumAnnots();
112      for (let i = 0; i < annotCount; ++i) {
113        const annot = await pageTemp.getAnnot(i);
114        const annotText = await te.getTextUnderAnnot(annot);
115        textOutput.push(annotText);
116      }
117    } catch (e) {
118      console.log('Document no longer exists, demo probably unmounted', e);
119    }
120    annotTextContent = textOutput.join('\n');    
121    updateGlobalVars(); // Update global variables after annotation extraction completes
122    // Trigger immediate UI update if available
123    if (window.updateUIContent) {
124      window.updateUIContent();
125    }
126  });
127  // Trigger UI update after annotation extraction is complete
128  if (window.updateUIElements) {
129    window.updateUIElements(pageNumber);
130  }
131};
132
133// Helper function to get PDFDoc from DocumentViewer
134async function getPDFDocument(documentViewer, PDFNet) {
135  const currentDocument = documentViewer.getDocument();
136  let doc;
137  if (!currentDocument) return;
138  if (currentDocument.type === 'office') {
139    const coreControls = window.WebViewer.getInstance().Core;
140    const buff = await currentDocument.getFileData();
141    const split = currentDocument.filename.split('.');
142    const extension = split[split.length - 1];
143    const options = {
144      extension: extension,
145    };
146    const pdfBuffer = await coreControls.officeToPDFBuffer(buff, options);
147    doc = await PDFNet.PDFDoc.createFromBuffer(pdfBuffer);
148  } else {
149    doc = await currentDocument.getPDFDoc();
150  }
151  return doc;
152};
153
154// Function to set the current page and trigger text extraction
155function setPage(pageNumber) {
156  // Validate page number
157  if (isNaN(pageNumber) || pageNumber < 0 || pageNumber > pageCount) return;
158  window.WebViewer.getInstance().Core.documentViewer.setCurrentPage(Number(pageNumber));
159  currentPage = pageNumber;
160  // Trigger text extraction for the new page
161  getAllTextFromDocument(pageNumber);
162};
163
164// Listener for annotation changes to update annotation text on the current page
165function getAnnotListener() { 
166  getAllTextFromDocument(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
167}
168
169// Function called on page change to update current page
170function viewerUpdated() {
171  setPage(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
172}
173
174// UI Elements
175// Function to create and initialize UI elements
176function createUIElements() {
177  // Create a container for all controls (label, dropdown, and buttons)
178  // Dynamically load ui-elements.js if not already loaded
179  if (!window.SidePanel) {
180    const script = document.createElement('script');
181    script.src = '/showcase-demos/pdf-text-extractor/ui-elements.js';
182    script.onload = () => {
183      UIElements.init('viewer');
184
185    };
186    document.head.appendChild(script);
187  }
188}
189
190// Function to update global window variables
191function updateGlobalVars() {
192  window.currentPage = currentPage;
193  window.pageCount = pageCount;
194  window.textContent = textContent;
195  window.annotTextContent = annotTextContent;
196}
197
198// Initialize the WebViewer
199initializeWebViewer();
200

1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/ui-elements.js
4
5class UIElements {
6
7  static init(viewerId) {
8    this.createSidePanel(viewerId);
9  }
10
11  // Function to create a side panel that sits on the left side of the viewer
12  static createSidePanel(viewerId) {
13    const viewerElement = document.getElementById(viewerId);
14    if (!viewerElement) {
15      console.error(`Viewer element with id '${viewerId}' not found.`);
16      return;
17    }
18
19    // Create the side panel container
20    const sidePanel = document.createElement('div');
21    sidePanel.id = 'side-panel';
22    sidePanel.className = 'side-panel';
23
24    // Create side panel content
25    const content = document.createElement('div');
26    content.className = 'side-panel-content';
27
28    // Add the text extraction content
29    const sampleContent = document.createElement('div');
30    sampleContent.innerHTML = `
31      <div class="panel-section">
32        <h4>Text Extraction</h4>
33
34        <div id="page-info">
35          <label class="page-label"><strong>Page</strong></label>
36          <input type="number" id="input-page-number" min="1" value="0" class="page-input" readonly>
37          <label id="page-count-label" class="page-count-label">of 0 full page text</label>
38        </div>
39
40        <div id="page-text-all">
41          <label class="text-section-label">Page Text Content:</label>
42          <textarea id="page-text-content" class="text-display" readonly placeholder="Page text will appear here..."></textarea>
43        </div>
44
45        <div id="page-text-annotations">
46          <label id="annotations-label" class="text-section-label">Page 1 text under annotations:</label>
47          <textarea id="page-annotations-content" class="text-display" readonly placeholder="Annotation text will appear here..."></textarea>
48        </div>
49      </div>
50    `;
51
52    content.appendChild(sampleContent);
53    sidePanel.appendChild(content);
54 
55    // Create a wrapper to contain both the side panel and viewer
56    const wrapper = document.createElement('div');
57    wrapper.id = 'viewer-wrapper';
58    wrapper.className = 'viewer-wrapper';
59 
60    // Insert the wrapper before the viewer element
61    viewerElement.parentNode.insertBefore(wrapper, viewerElement);
62 
63    // Move the viewer element into the wrapper and add the side panel
64    wrapper.appendChild(sidePanel);
65    wrapper.appendChild(viewerElement);
66 
67    // Add the viewer-with-panel class to the viewer element
68    viewerElement.classList.add('viewer-with-panel');
69    console.log('Side panel created successfully');
70  }
71
72  // Function to add content to the side panel
73  addPanelContent(content) {
74    const panelContent = document.querySelector('.side-panel-content');
75    if (panelContent) {
76      const contentDiv = document.createElement('div');
77      contentDiv.className = 'panel-section';
78      contentDiv.innerHTML = content;
79      panelContent.appendChild(contentDiv);
80    }
81  }
82
83  // Setup event handlers for the UI elements
84  static updateUIElements() {
85    const pageInput = document.getElementById('input-page-number');
86    const pageCountLabel = document.getElementById('page-count-label');
87    const pageTextContent = document.getElementById('page-text-content');
88    const pageAnnotationsContent = document.getElementById('page-annotations-content');
89    const annotationsLabel = document.getElementById('annotations-label');
90 
91    // Function to update page count display
92    const updatePageCount = () => {
93      const totalPages = window.pageCount || 0;
94      pageCountLabel.textContent = `of ${totalPages} full page text`;
95    };
96    // Function to update content displays
97    const updateContent = (pageNumber) => {
98      // Update annotations label
99      annotationsLabel.textContent = `Page ${pageNumber} text under annotations:`;
100      
101      // Call the global function to get text
102      if (window.getAllTextFromDocument) {
103        window.getAllTextFromDocument(pageNumber);
104      }
105    };    
106 
107    // Function to immediately update UI content from global variables
108    const updateUIContent = () => {
109      if (window.textContent !== undefined) {
110        pageTextContent.value = window.textContent || 'No text found on this page.';
111      }
112      if (window.annotTextContent !== undefined) {
113        pageAnnotationsContent.value = window.annotTextContent || 'No annotation text found on this page.';
114      }
115    };
116    
117    // Expose the UI update function globally
118    window.updateUIContent = updateUIContent;
119 
120    // Function to update page input from window.currentPage
121    const updatePageInput = () => {
122      console.log('Updating page input. Current page:', window.currentPage);
123      if (window.currentPage !== undefined && pageInput.value != window.currentPage) {
124        console.log('Page input changed, updating to:', window.currentPage);
125        pageInput.value = window.currentPage;
126        updateContent(window.currentPage);
127      }
128    };
129
130    // Monitor for page count updates and current page changes
131    const checkPageCount = () => {
132      updatePageCount();
133      updatePageInput();
134      if (window.pageCount > 0) {
135        pageInput.max = window.pageCount;
136      }
137    };
138
139    // Update content for the current page
140    updatePageCount();
141    updatePageInput();
142    updateContent(window.currentPage);
143  }
144}
145
146window.updateUIElements = UIElements.updateUIElements; //Make it globally accessible
147

1/* Main layout - side by side containers within #viewer */
2#viewer {
3  display: flex;
4  height: 100%;
5  width: 100%;
6}
7
8/* Side Panel Styles */
9.viewer-wrapper {
10  display: flex;
11  height: 100vh;
12  width: 100%;
13}
14
15.side-panel {
16  width: 300px;
17  min-width: 250px;
18  max-width: 400px;
19  background-color: #f5f5f5;
20  border-right: 1px solid #ddd;
21  box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
22  transition: transform 0.3s ease;
23  z-index: 1000;
24  display: flex;
25  flex-direction: column;
26}
27
28.side-panel.collapsed {
29  transform: translateX(-100%);
30}
31
32.side-panel-header {
33  background-color: #e9ecef;
34  padding: 15px 20px;
35  border-bottom: 1px solid #ddd;
36  flex-shrink: 0;
37}
38
39.side-panel-header h3 {
40  margin: 0;
41  font-size: 18px;
42  font-weight: 600;
43  color: #333;
44}
45
46.side-panel-content {
47  flex: 1;
48  padding: 20px;
49  overflow-y: auto;
50}
51
52.panel-section {
53  margin-bottom: 25px;
54}
55
56.panel-section h4 {
57  margin: 0 0 12px 0;
58  font-size: 14px;
59  font-weight: 600;
60  color: #555;
61  text-transform: uppercase;
62  letter-spacing: 0.5px;
63}
64
65.panel-button {
66  display: block;
67  width: 100%;
68  padding: 10px 15px;
69  margin-bottom: 8px;
70  background-color: #fff;
71  border: 1px solid #ddd;
72  border-radius: 4px;
73  cursor: pointer;
74  transition: all 0.2s ease;
75  font-size: 14px;
76}
77
78.panel-button:hover {
79  background-color: #007bff;
80  color: white;
81  border-color: #007bff;
82}
83
84.panel-button:active {
85  transform: translateY(1px);
86}
87
88/* Text Extraction UI Styles */
89#page-info {
90  display: flex;
91  align-items: center;
92  gap: 8px;
93  margin-bottom: 15px;
94  flex-wrap: wrap;
95}
96
97.page-label {
98  font-size: 14px;
99  font-weight: 600;
100  color: #333;
101  white-space: nowrap;
102}
103
104.page-input {
105  width: 60px;
106  padding: 4px 8px;
107  border: 1px solid #ddd;
108  border-radius: 4px;
109  font-size: 14px;
110  text-align: center;
111}
112
113.page-input:focus {
114  outline: none;
115  border-color: #007bff;
116  box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
117}
118
119.page-count-label {
120  font-size: 12px;
121  color: #666;
122  white-space: nowrap;
123}
124
125.text-section-label {
126  display: block;
127  font-size: 12px;
128  font-weight: 600;
129  color: #555;
130  margin-bottom: 5px;
131  text-transform: uppercase;
132  letter-spacing: 0.5px;
133}
134
135.text-display {
136  width: 100%;
137  height: 120px;
138  padding: 10px;
139  border: 1px solid #ddd;
140  border-radius: 4px;
141  font-size: 12px;
142  font-family: 'Courier New', monospace;
143  line-height: 1.4;
144  resize: vertical;
145  background-color: #f9f9f9;
146  color: #333;
147  white-space: pre-wrap;
148  word-wrap: break-word;
149  overflow-y: auto;
150  overflow-x: hidden;
151}
152
153.text-display:focus {
154  outline: none;
155  border-color: #007bff;
156  box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
157}
158
159.text-display::placeholder {
160  color: #999;
161  font-style: italic;
162}
163
164#page-text-all {
165  margin-bottom: 20px;
166}
167
168#page-text-annotations {
169  margin-bottom: 15px;
170}
171
172.setting-item {
173  margin-bottom: 15px;
174}
175
176.setting-item label {
177  display: flex;
178  align-items: center;
179  font-size: 14px;
180  color: #555;
181  cursor: pointer;
182}
183
184.setting-item input[type="checkbox"] {
185  margin-right: 10px;
186}
187
188.setting-item input[type="range"] {
189  margin-left: 10px;
190  flex: 1;
191}
192
193.viewer-with-panel {
194  flex: 1;
195  height: 100vh;
196}
197
198/* Dark mode styles */
199@media (prefers-color-scheme: dark) {
200  .side-panel {
201    background-color: #2d3748;
202    border-right-color: #4a5568;
203  }
204  
205  .side-panel-header {
206    background-color: #1a202c;
207    border-bottom-color: #4a5568;
208  }
209  
210  .side-panel-header h3 {
211    color: #e2e8f0;
212  }
213  
214  .panel-section h4 {
215    color: #a0aec0;
216  }
217  
218  .panel-button {
219    background-color: #4a5568;
220    border-color: #718096;
221    color: #e2e8f0;
222  }
223  
224  .panel-button:hover {
225    background-color: #007bff;
226    border-color: #007bff;
227  }
228  
229  .setting-item label {
230    color: #a0aec0;
231  }
232  
233  /* Text extraction dark mode styles */
234  .page-label {
235    color: #e2e8f0;
236  }
237  
238  .page-input {
239    background-color: #4a5568;
240    border-color: #718096;
241    color: #e2e8f0;
242  }
243  
244  .page-input:focus {
245    border-color: #007bff;
246    box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
247  }
248  
249  .page-count-label {
250    color: #a0aec0;
251  }
252  
253  .text-section-label {
254    color: #a0aec0;
255  }
256  
257  .text-display {
258    background-color: #4a5568;
259    border-color: #718096;
260    color: #e2e8f0;
261  }
262  
263  .text-display:focus {
264    border-color: #007bff;
265    box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
266  }
267  
268  .text-display::placeholder {
269    color: #718096;
270  }
271}
272
273/* Responsive design */
274@media (max-width: 768px) {
275  .side-panel {
276    position: absolute;
277    left: 0;
278    top: 0;
279    height: 100%;
280    z-index: 1001;
281  }
282  
283  .viewer-wrapper {
284    position: relative;
285  }
286  
287  .side-panel.collapsed {
288    transform: translateX(-100%);
289  }
290}
291
292/* Toggle button for mobile */
293.side-panel-toggle {
294  position: fixed;
295  top: 20px;
296  left: 20px;
297  z-index: 1002;
298  background-color: #007bff;
299  color: white;
300  border: none;
301  border-radius: 4px;
302  padding: 10px;
303  cursor: pointer;
304  display: none;
305}
306
307@media (max-width: 768px) {
308  .side-panel-toggle {
309    display: block;
310  }
311  
312  /* Text extraction responsive styles */
313  #page-info {
314    flex-direction: column;
315    align-items: flex-start;
316    gap: 5px;
317  }
318  
319  .page-input {
320    width: 80px;
321  }
322  
323  .text-display {
324    height: 100px;
325    font-size: 11px;
326  }
327  
328  .page-count-label {
329    font-size: 11px;
330  }
331}
332
333/* Theme Switch Styles */
334.theme-switch-container {
335  display: flex;
336  justify-content: center;
337  margin-top: 15px;
338}
339
340.theme-switch {
341  position: relative;
342  display: flex;
343  background-color: #e9ecef;
344  border-radius: 25px;
345  padding: 4px;
346  border: 2px solid #dee2e6;
347  width: 200px;
348  height: 50px;
349  overflow: hidden;
350}
351
352.theme-switch input[type="radio"] {
353  display: none;
354}
355
356.switch-option {
357  flex: 1;
358  display: flex;
359  flex-direction: column;
360  align-items: center;
361  justify-content: center;
362  cursor: pointer;
363  position: relative;
364  z-index: 2;
365  transition: color 0.3s ease;
366  padding: 5px;
367}
368
369.switch-option.left {
370  border-radius: 20px 0 0 20px;
371}
372
373.switch-option.right {
374  border-radius: 0 20px 20px 0;
375}
376
377.switch-icon {
378  font-size: 16px;
379  margin-bottom: 2px;
380}
381
382.switch-label {
383  font-size: 12px;
384  font-weight: 500;
385  text-transform: uppercase;
386  letter-spacing: 0.5px;
387}
388
389.switch-slider {
390  position: absolute;
391  top: 4px;
392  left: 4px;
393  width: calc(50% - 4px);
394  height: calc(100% - 8px);
395  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
396  border-radius: 20px;
397  transition: transform 0.3s ease, background 0.3s ease;
398  z-index: 1;
399  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
400}
401
402/* Dark mode selected */
403#dark-mode:checked ~ .switch-slider {
404  transform: translateX(0);
405  background: linear-gradient(135deg, #2c3e50 0%, #5a5c5e 100%);
406}
407
408/* Light mode selected */
409#light-mode:checked ~ .switch-slider {
410  transform: translateX(100%);
411  background: linear-gradient(135deg, #e4ce85 0%, #e9ca1d 100%);
412}
413
414/* Text color changes */
415#dark-mode:checked ~ .switch-option.left {
416  color: white;
417}
418
419#light-mode:checked ~ .switch-option.right {
420  color: white;
421}
422
423.switch-option {
424  color: #6c757d;
425}
426
427/* Dark mode theme styles */
428@media (prefers-color-scheme: dark) {
429  .theme-switch {
430    background-color: #4a5568;
431    border-color: #718096;
432  }
433  
434  .switch-option {
435    color: #a0aec0;
436  }
437  
438  #dark-mode:checked ~ .switch-option.left {
439    color: white;
440  }
441  
442  #light-mode:checked ~ .switch-option.right {
443    color: white;
444  }
445}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

PDF Text Extractor Showcase Demo Code Sample