Quickly extract text from PDFs—either from entire pages or just the highlighted sections.
This demo allows you to:
Implementation steps
To add PDF Text Extraction capability with WebViewer:
Step 1: Choose your preferred web stack for WebViewer
Step 2: Add the ES6 JavaScript sample code provided in this guide
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/index.js
4import WebViewer from '@pdftron/webviewer';
5
6// Global variables to track state
7let redactionDemoFile = "https://apryse.s3.amazonaws.com/public/files/samples/section-508.pdf";
8let pageCount = 0;
9let textContent = '';
10let annotTextContent = '';
11let currentPage = 0;
12
13// Function to initialize and load the Redaction Tool
14function initializeWebViewer() {
15
16 const element = document.getElementById('viewer');
17 if (!element) {
18 console.error('Viewer div not found.');
19 return;
20 }
21
22 WebViewer({
23 path: '/lib',
24 initialDoc: redactionDemoFile,
25 licenseKey: 'YOUR_LICENSE_KEY',
26 fullAPI: true,
27 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
28 }, element).then(instance => {
29 // define documentViewer for use in other functions
30 const { documentViewer } = instance.Core;
31 documentViewer.addEventListener('documentLoaded', () => {
32 const { UI } = instance;
33 UI.setLayoutMode(UI.LayoutMode.Single); // Set the layout mode to single page view
34 UI.disableFadePageNavigationComponent(); // Keeps the page navigation component on screen all the time
35 pageCount = documentViewer.getDocument().getPageCount(); // Update page count
36 setPage(1); //Set 1st page to trigger text extraction
37 });
38 // Event listeners for page changes
39 documentViewer.addEventListener('pageNumberUpdated', viewerUpdated);
40 // Annotation change listener to update annotation text when annotations are modified
41 instance.Core.documentViewer
42 .getAnnotationManager()
43 .addEventListener('annotationChanged', getAnnotListener);
44 // UI Section
45 createUIElements();
46 // Trigger immediate UI update if available
47 if (window.updateUIContent) {
48 window.updateUIContent();
49 }
50 });
51}
52
53// Function to extract all text from a given page
54async function getAllTextFromDocument(pageNumber) {
55 const doc = window.WebViewer.getInstance().Core.documentViewer.getDocument();
56 if (pageNumber > 0 && doc) {
57 await getText(pageNumber); // Only proceed if we have a valid document
58 await getAnnotText(pageNumber);
59 } else {
60 console.warn('Document not available or invalid page number');
61 }
62};
63
64// Function to extract text from a given page
65async function getText(pageNumber) {
66 const { documentViewer } = window.WebViewer.getInstance().Core;
67 const doc = documentViewer.getDocument();
68 // Check if document is loaded before proceeding
69 if (!doc) {
70 return;
71 }
72
73 const newPageCount = doc.getPageCount();
74 await doc.loadPageText(pageNumber, (newText) => {
75 textContent = newText;
76 pageCount = newPageCount;
77 pageNumber = pageNumber;
78 });
79}
80
81// Function to extract text under annotations on a given page
82async function getAnnotText(pageNumber) {
83 const { PDFNet, documentViewer } = window.WebViewer.getInstance().Core;
84 await PDFNet.initialize();
85 await documentViewer.getAnnotationsLoadedPromise(); // Ensure annotations are loaded
86 const doc = await getPDFDocument(documentViewer, PDFNet);
87 const annotationManager = documentViewer.getAnnotationManager();
88 const annotList = annotationManager
89 .getAnnotationsList()
90 .filter((a) => a.getPageNumber() === pageNumber);
91 const xfdf_string = await annotationManager.exportAnnotations({ annotationList: annotList });
92 const textOutput = [];
93
94 if (!doc) {
95 console.warn('PDF document not available');
96 return;
97 }
98
99 // Run PDFNet methods with memory management
100 await PDFNet.runWithCleanup(async () => {
101 // lock the document before a write operation
102 // runWithCleanup will auto unlock when complete
103 try {
104 doc.lock();
105 const fdf_doc = await PDFNet.FDFDoc.createFromXFDF(xfdf_string);
106 await doc.fdfUpdate(fdf_doc);
107 const pageTemp = await doc.getPage(pageNumber);
108 const rect = await pageTemp.getCropBox();
109 const te = await PDFNet.TextExtractor.create();
110 te.begin(pageTemp, rect);
111 const annotCount = await pageTemp.getNumAnnots();
112 for (let i = 0; i < annotCount; ++i) {
113 const annot = await pageTemp.getAnnot(i);
114 const annotText = await te.getTextUnderAnnot(annot);
115 textOutput.push(annotText);
116 }
117 } catch (e) {
118 console.log('Document no longer exists, demo probably unmounted', e);
119 }
120 annotTextContent = textOutput.join('\n');
121 updateGlobalVars(); // Update global variables after annotation extraction completes
122 // Trigger immediate UI update if available
123 if (window.updateUIContent) {
124 window.updateUIContent();
125 }
126 });
127 // Trigger UI update after annotation extraction is complete
128 if (window.updateUIElements) {
129 window.updateUIElements(pageNumber);
130 }
131};
132
133// Helper function to get PDFDoc from DocumentViewer
134async function getPDFDocument(documentViewer, PDFNet) {
135 const currentDocument = documentViewer.getDocument();
136 let doc;
137 if (!currentDocument) return;
138 if (currentDocument.type === 'office') {
139 const coreControls = window.WebViewer.getInstance().Core;
140 const buff = await currentDocument.getFileData();
141 const split = currentDocument.filename.split('.');
142 const extension = split[split.length - 1];
143 const options = {
144 extension: extension,
145 };
146 const pdfBuffer = await coreControls.officeToPDFBuffer(buff, options);
147 doc = await PDFNet.PDFDoc.createFromBuffer(pdfBuffer);
148 } else {
149 doc = await currentDocument.getPDFDoc();
150 }
151 return doc;
152};
153
154// Function to set the current page and trigger text extraction
155function setPage(pageNumber) {
156 // Validate page number
157 if (isNaN(pageNumber) || pageNumber < 0 || pageNumber > pageCount) return;
158 window.WebViewer.getInstance().Core.documentViewer.setCurrentPage(Number(pageNumber));
159 currentPage = pageNumber;
160 // Trigger text extraction for the new page
161 getAllTextFromDocument(pageNumber);
162};
163
164// Listener for annotation changes to update annotation text on the current page
165function getAnnotListener() {
166 getAllTextFromDocument(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
167}
168
169// Function called on page change to update current page
170function viewerUpdated() {
171 setPage(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
172}
173
174// UI Elements
175// Function to create and initialize UI elements
176function createUIElements() {
177 // Create a container for all controls (label, dropdown, and buttons)
178 // Dynamically load ui-elements.js if not already loaded
179 if (!window.SidePanel) {
180 const script = document.createElement('script');
181 script.src = '/showcase-demos/pdf-text-extractor/ui-elements.js';
182 script.onload = () => {
183 UIElements.init('viewer');
184
185 };
186 document.head.appendChild(script);
187 }
188}
189
190// Function to update global window variables
191function updateGlobalVars() {
192 window.currentPage = currentPage;
193 window.pageCount = pageCount;
194 window.textContent = textContent;
195 window.annotTextContent = annotTextContent;
196}
197
198// Initialize the WebViewer
199initializeWebViewer();
200
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/ui-elements.js
4
5class UIElements {
6
7 static init(viewerId) {
8 this.createSidePanel(viewerId);
9 }
10
11 // Function to create a side panel that sits on the left side of the viewer
12 static createSidePanel(viewerId) {
13 const viewerElement = document.getElementById(viewerId);
14 if (!viewerElement) {
15 console.error(`Viewer element with id '${viewerId}' not found.`);
16 return;
17 }
18
19 // Create the side panel container
20 const sidePanel = document.createElement('div');
21 sidePanel.id = 'side-panel';
22 sidePanel.className = 'side-panel';
23
24 // Create side panel content
25 const content = document.createElement('div');
26 content.className = 'side-panel-content';
27
28 // Add the text extraction content
29 const sampleContent = document.createElement('div');
30 sampleContent.innerHTML = `
31 <div class="panel-section">
32 <h4>Text Extraction</h4>
33
34 <div id="page-info">
35 <label class="page-label"><strong>Page</strong></label>
36 <input type="number" id="input-page-number" min="1" value="0" class="page-input" readonly>
37 <label id="page-count-label" class="page-count-label">of 0 full page text</label>
38 </div>
39
40 <div id="page-text-all">
41 <label class="text-section-label">Page Text Content:</label>
42 <textarea id="page-text-content" class="text-display" readonly placeholder="Page text will appear here..."></textarea>
43 </div>
44
45 <div id="page-text-annotations">
46 <label id="annotations-label" class="text-section-label">Page 1 text under annotations:</label>
47 <textarea id="page-annotations-content" class="text-display" readonly placeholder="Annotation text will appear here..."></textarea>
48 </div>
49 </div>
50 `;
51
52 content.appendChild(sampleContent);
53 sidePanel.appendChild(content);
54
55 // Create a wrapper to contain both the side panel and viewer
56 const wrapper = document.createElement('div');
57 wrapper.id = 'viewer-wrapper';
58 wrapper.className = 'viewer-wrapper';
59
60 // Insert the wrapper before the viewer element
61 viewerElement.parentNode.insertBefore(wrapper, viewerElement);
62
63 // Move the viewer element into the wrapper and add the side panel
64 wrapper.appendChild(sidePanel);
65 wrapper.appendChild(viewerElement);
66
67 // Add the viewer-with-panel class to the viewer element
68 viewerElement.classList.add('viewer-with-panel');
69 console.log('Side panel created successfully');
70 }
71
72 // Function to add content to the side panel
73 addPanelContent(content) {
74 const panelContent = document.querySelector('.side-panel-content');
75 if (panelContent) {
76 const contentDiv = document.createElement('div');
77 contentDiv.className = 'panel-section';
78 contentDiv.innerHTML = content;
79 panelContent.appendChild(contentDiv);
80 }
81 }
82
83 // Setup event handlers for the UI elements
84 static updateUIElements() {
85 const pageInput = document.getElementById('input-page-number');
86 const pageCountLabel = document.getElementById('page-count-label');
87 const pageTextContent = document.getElementById('page-text-content');
88 const pageAnnotationsContent = document.getElementById('page-annotations-content');
89 const annotationsLabel = document.getElementById('annotations-label');
90
91 // Function to update page count display
92 const updatePageCount = () => {
93 const totalPages = window.pageCount || 0;
94 pageCountLabel.textContent = `of ${totalPages} full page text`;
95 };
96 // Function to update content displays
97 const updateContent = (pageNumber) => {
98 // Update annotations label
99 annotationsLabel.textContent = `Page ${pageNumber} text under annotations:`;
100
101 // Call the global function to get text
102 if (window.getAllTextFromDocument) {
103 window.getAllTextFromDocument(pageNumber);
104 }
105 };
106
107 // Function to immediately update UI content from global variables
108 const updateUIContent = () => {
109 if (window.textContent !== undefined) {
110 pageTextContent.value = window.textContent || 'No text found on this page.';
111 }
112 if (window.annotTextContent !== undefined) {
113 pageAnnotationsContent.value = window.annotTextContent || 'No annotation text found on this page.';
114 }
115 };
116
117 // Expose the UI update function globally
118 window.updateUIContent = updateUIContent;
119
120 // Function to update page input from window.currentPage
121 const updatePageInput = () => {
122 console.log('Updating page input. Current page:', window.currentPage);
123 if (window.currentPage !== undefined && pageInput.value != window.currentPage) {
124 console.log('Page input changed, updating to:', window.currentPage);
125 pageInput.value = window.currentPage;
126 updateContent(window.currentPage);
127 }
128 };
129
130 // Monitor for page count updates and current page changes
131 const checkPageCount = () => {
132 updatePageCount();
133 updatePageInput();
134 if (window.pageCount > 0) {
135 pageInput.max = window.pageCount;
136 }
137 };
138
139 // Update content for the current page
140 updatePageCount();
141 updatePageInput();
142 updateContent(window.currentPage);
143 }
144}
145
146window.updateUIElements = UIElements.updateUIElements; //Make it globally accessible
147
1/* Main layout - side by side containers within #viewer */
2#viewer {
3 display: flex;
4 height: 100%;
5 width: 100%;
6}
7
8/* Side Panel Styles */
9.viewer-wrapper {
10 display: flex;
11 height: 100vh;
12 width: 100%;
13}
14
15.side-panel {
16 width: 300px;
17 min-width: 250px;
18 max-width: 400px;
19 background-color: #f5f5f5;
20 border-right: 1px solid #ddd;
21 box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
22 transition: transform 0.3s ease;
23 z-index: 1000;
24 display: flex;
25 flex-direction: column;
26}
27
28.side-panel.collapsed {
29 transform: translateX(-100%);
30}
31
32.side-panel-header {
33 background-color: #e9ecef;
34 padding: 15px 20px;
35 border-bottom: 1px solid #ddd;
36 flex-shrink: 0;
37}
38
39.side-panel-header h3 {
40 margin: 0;
41 font-size: 18px;
42 font-weight: 600;
43 color: #333;
44}
45
46.side-panel-content {
47 flex: 1;
48 padding: 20px;
49 overflow-y: auto;
50}
51
52.panel-section {
53 margin-bottom: 25px;
54}
55
56.panel-section h4 {
57 margin: 0 0 12px 0;
58 font-size: 14px;
59 font-weight: 600;
60 color: #555;
61 text-transform: uppercase;
62 letter-spacing: 0.5px;
63}
64
65.panel-button {
66 display: block;
67 width: 100%;
68 padding: 10px 15px;
69 margin-bottom: 8px;
70 background-color: #fff;
71 border: 1px solid #ddd;
72 border-radius: 4px;
73 cursor: pointer;
74 transition: all 0.2s ease;
75 font-size: 14px;
76}
77
78.panel-button:hover {
79 background-color: #007bff;
80 color: white;
81 border-color: #007bff;
82}
83
84.panel-button:active {
85 transform: translateY(1px);
86}
87
88/* Text Extraction UI Styles */
89#page-info {
90 display: flex;
91 align-items: center;
92 gap: 8px;
93 margin-bottom: 15px;
94 flex-wrap: wrap;
95}
96
97.page-label {
98 font-size: 14px;
99 font-weight: 600;
100 color: #333;
101 white-space: nowrap;
102}
103
104.page-input {
105 width: 60px;
106 padding: 4px 8px;
107 border: 1px solid #ddd;
108 border-radius: 4px;
109 font-size: 14px;
110 text-align: center;
111}
112
113.page-input:focus {
114 outline: none;
115 border-color: #007bff;
116 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
117}
118
119.page-count-label {
120 font-size: 12px;
121 color: #666;
122 white-space: nowrap;
123}
124
125.text-section-label {
126 display: block;
127 font-size: 12px;
128 font-weight: 600;
129 color: #555;
130 margin-bottom: 5px;
131 text-transform: uppercase;
132 letter-spacing: 0.5px;
133}
134
135.text-display {
136 width: 100%;
137 height: 120px;
138 padding: 10px;
139 border: 1px solid #ddd;
140 border-radius: 4px;
141 font-size: 12px;
142 font-family: 'Courier New', monospace;
143 line-height: 1.4;
144 resize: vertical;
145 background-color: #f9f9f9;
146 color: #333;
147 white-space: pre-wrap;
148 word-wrap: break-word;
149 overflow-y: auto;
150 overflow-x: hidden;
151}
152
153.text-display:focus {
154 outline: none;
155 border-color: #007bff;
156 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
157}
158
159.text-display::placeholder {
160 color: #999;
161 font-style: italic;
162}
163
164#page-text-all {
165 margin-bottom: 20px;
166}
167
168#page-text-annotations {
169 margin-bottom: 15px;
170}
171
172.setting-item {
173 margin-bottom: 15px;
174}
175
176.setting-item label {
177 display: flex;
178 align-items: center;
179 font-size: 14px;
180 color: #555;
181 cursor: pointer;
182}
183
184.setting-item input[type="checkbox"] {
185 margin-right: 10px;
186}
187
188.setting-item input[type="range"] {
189 margin-left: 10px;
190 flex: 1;
191}
192
193.viewer-with-panel {
194 flex: 1;
195 height: 100vh;
196}
197
198/* Dark mode styles */
199@media (prefers-color-scheme: dark) {
200 .side-panel {
201 background-color: #2d3748;
202 border-right-color: #4a5568;
203 }
204
205 .side-panel-header {
206 background-color: #1a202c;
207 border-bottom-color: #4a5568;
208 }
209
210 .side-panel-header h3 {
211 color: #e2e8f0;
212 }
213
214 .panel-section h4 {
215 color: #a0aec0;
216 }
217
218 .panel-button {
219 background-color: #4a5568;
220 border-color: #718096;
221 color: #e2e8f0;
222 }
223
224 .panel-button:hover {
225 background-color: #007bff;
226 border-color: #007bff;
227 }
228
229 .setting-item label {
230 color: #a0aec0;
231 }
232
233 /* Text extraction dark mode styles */
234 .page-label {
235 color: #e2e8f0;
236 }
237
238 .page-input {
239 background-color: #4a5568;
240 border-color: #718096;
241 color: #e2e8f0;
242 }
243
244 .page-input:focus {
245 border-color: #007bff;
246 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
247 }
248
249 .page-count-label {
250 color: #a0aec0;
251 }
252
253 .text-section-label {
254 color: #a0aec0;
255 }
256
257 .text-display {
258 background-color: #4a5568;
259 border-color: #718096;
260 color: #e2e8f0;
261 }
262
263 .text-display:focus {
264 border-color: #007bff;
265 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
266 }
267
268 .text-display::placeholder {
269 color: #718096;
270 }
271}
272
273/* Responsive design */
274@media (max-width: 768px) {
275 .side-panel {
276 position: absolute;
277 left: 0;
278 top: 0;
279 height: 100%;
280 z-index: 1001;
281 }
282
283 .viewer-wrapper {
284 position: relative;
285 }
286
287 .side-panel.collapsed {
288 transform: translateX(-100%);
289 }
290}
291
292/* Toggle button for mobile */
293.side-panel-toggle {
294 position: fixed;
295 top: 20px;
296 left: 20px;
297 z-index: 1002;
298 background-color: #007bff;
299 color: white;
300 border: none;
301 border-radius: 4px;
302 padding: 10px;
303 cursor: pointer;
304 display: none;
305}
306
307@media (max-width: 768px) {
308 .side-panel-toggle {
309 display: block;
310 }
311
312 /* Text extraction responsive styles */
313 #page-info {
314 flex-direction: column;
315 align-items: flex-start;
316 gap: 5px;
317 }
318
319 .page-input {
320 width: 80px;
321 }
322
323 .text-display {
324 height: 100px;
325 font-size: 11px;
326 }
327
328 .page-count-label {
329 font-size: 11px;
330 }
331}
332
333/* Theme Switch Styles */
334.theme-switch-container {
335 display: flex;
336 justify-content: center;
337 margin-top: 15px;
338}
339
340.theme-switch {
341 position: relative;
342 display: flex;
343 background-color: #e9ecef;
344 border-radius: 25px;
345 padding: 4px;
346 border: 2px solid #dee2e6;
347 width: 200px;
348 height: 50px;
349 overflow: hidden;
350}
351
352.theme-switch input[type="radio"] {
353 display: none;
354}
355
356.switch-option {
357 flex: 1;
358 display: flex;
359 flex-direction: column;
360 align-items: center;
361 justify-content: center;
362 cursor: pointer;
363 position: relative;
364 z-index: 2;
365 transition: color 0.3s ease;
366 padding: 5px;
367}
368
369.switch-option.left {
370 border-radius: 20px 0 0 20px;
371}
372
373.switch-option.right {
374 border-radius: 0 20px 20px 0;
375}
376
377.switch-icon {
378 font-size: 16px;
379 margin-bottom: 2px;
380}
381
382.switch-label {
383 font-size: 12px;
384 font-weight: 500;
385 text-transform: uppercase;
386 letter-spacing: 0.5px;
387}
388
389.switch-slider {
390 position: absolute;
391 top: 4px;
392 left: 4px;
393 width: calc(50% - 4px);
394 height: calc(100% - 8px);
395 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
396 border-radius: 20px;
397 transition: transform 0.3s ease, background 0.3s ease;
398 z-index: 1;
399 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
400}
401
402/* Dark mode selected */
403#dark-mode:checked ~ .switch-slider {
404 transform: translateX(0);
405 background: linear-gradient(135deg, #2c3e50 0%, #5a5c5e 100%);
406}
407
408/* Light mode selected */
409#light-mode:checked ~ .switch-slider {
410 transform: translateX(100%);
411 background: linear-gradient(135deg, #e4ce85 0%, #e9ca1d 100%);
412}
413
414/* Text color changes */
415#dark-mode:checked ~ .switch-option.left {
416 color: white;
417}
418
419#light-mode:checked ~ .switch-option.right {
420 color: white;
421}
422
423.switch-option {
424 color: #6c757d;
425}
426
427/* Dark mode theme styles */
428@media (prefers-color-scheme: dark) {
429 .theme-switch {
430 background-color: #4a5568;
431 border-color: #718096;
432 }
433
434 .switch-option {
435 color: #a0aec0;
436 }
437
438 #dark-mode:checked ~ .switch-option.left {
439 color: white;
440 }
441
442 #light-mode:checked ~ .switch-option.right {
443 color: white;
444 }
445}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales