Quickly extract text from PDFs—either from entire pages or highlighted sections only.
This demo allows you to:
Implementation steps
To add PDF Text Extraction capability with WebViewer:
Step 1: Choose your preferred web stack for WebViewer.
Step 2: Add the ES6 JavaScript sample code provided in this guide.
Once you generate your license key, it will automatically be included in your sample code below.
Apryse collects some data regarding your usage of the SDK for product improvement.
The data that Apryse collects include:
For clarity, no other data is collected by the SDK and Apryse has no access to the contents of your documents.
If you wish to continue without data collection, contact us and we will email you a no-tracking trial key for you to get started.
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/index.js
4import WebViewer from '@pdftron/webviewer';
5
6// Global variables to track state.
7let redactionDemoFile = "https://apryse.s3.amazonaws.com/public/files/samples/section-508.pdf";
8let pageCount = 0;
9let textContent = '';
10let annotTextContent = '';
11let currentPage = 0;
12
13// Function to initialize and load the Redaction Tool.
14function initializeWebViewer() {
15
16 const element = document.getElementById('viewer');
17 if (!element) {
18 console.error('Viewer div not found.');
19 return;
20 }
21
22 WebViewer({
23 path: '/lib',
24 initialDoc: redactionDemoFile,
25 licenseKey: 'YOUR_LICENSE_KEY',
26 fullAPI: true,
27 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File.
28 }, element).then(instance => {
29 // Define documentViewer for use in other functions.
30 const { documentViewer } = instance.Core;
31 documentViewer.addEventListener('documentLoaded', () => {
32 const { UI } = instance;
33 UI.setLayoutMode(UI.LayoutMode.Single); // Set the layout mode to single page view.
34 UI.disableFadePageNavigationComponent(); // Keep the page navigation component on screen all the time.
35 pageCount = documentViewer.getDocument().getPageCount(); // Update page count.
36 setPage(1); //Set 1st page to trigger text extraction.
37 });
38 // Event listeners for page changes.
39 documentViewer.addEventListener('pageNumberUpdated', viewerUpdated);
40 // Annotation change listener to update annotation text when annotations are modified.
41 instance.Core.documentViewer
42 .getAnnotationManager()
43 .addEventListener('annotationChanged', getAnnotListener);
44 // UI Section
45 createUIElements();
46 // Trigger immediate UI update, if available.
47 if (window.updateUIContent) {
48 window.updateUIContent();
49 }
50 });
51}
52
53// Function to extract all text from a given page.
54async function getAllTextFromDocument(pageNumber) {
55 const doc = window.WebViewer.getInstance().Core.documentViewer.getDocument();
56 if (pageNumber > 0 && doc) {
57 await getText(pageNumber); // Only proceed if there is a valid document.
58 await getAnnotText(pageNumber);
59 } else {
60 console.warn('Document not available or invalid page number');
61 }
62};
63
64// Function to extract text from a given page.
65async function getText(pageNumber) {
66 const { documentViewer } = window.WebViewer.getInstance().Core;
67 const doc = documentViewer.getDocument();
68 // Check if document is loaded before proceeding.
69 if (!doc) {
70 return;
71 }
72
73 const newPageCount = doc.getPageCount();
74 await doc.loadPageText(pageNumber, (newText) => {
75 textContent = newText;
76 pageCount = newPageCount;
77 pageNumber = pageNumber;
78 });
79}
80
81// Function to extract text under annotations on a given page.
82async function getAnnotText(pageNumber) {
83 const { PDFNet, documentViewer } = window.WebViewer.getInstance().Core;
84 await PDFNet.initialize();
85 await documentViewer.getAnnotationsLoadedPromise(); // Ensure annotations are loaded.
86 const doc = await getPDFDocument(documentViewer, PDFNet);
87 const annotationManager = documentViewer.getAnnotationManager();
88 const annotList = annotationManager
89 .getAnnotationsList()
90 .filter((a) => a.getPageNumber() === pageNumber);
91 const xfdf_string = await annotationManager.exportAnnotations({ annotationList: annotList });
92 const textOutput = [];
93
94 if (!doc) {
95 console.warn('PDF document not available');
96 return;
97 }
98
99 // Run PDFNet methods with memory management.
100 await PDFNet.runWithCleanup(async () => {
101 // Lock the document before a write operation.
102 // runWithCleanup will auto unlock when complete.
103 try {
104 doc.lock();
105 const fdf_doc = await PDFNet.FDFDoc.createFromXFDF(xfdf_string);
106 await doc.fdfUpdate(fdf_doc);
107 const pageTemp = await doc.getPage(pageNumber);
108 const rect = await pageTemp.getCropBox();
109 const te = await PDFNet.TextExtractor.create();
110 te.begin(pageTemp, rect);
111 const annotCount = await pageTemp.getNumAnnots();
112 for (let i = 0; i < annotCount; ++i) {
113 const annot = await pageTemp.getAnnot(i);
114 const annotText = await te.getTextUnderAnnot(annot);
115 textOutput.push(annotText);
116 }
117 } catch (e) {
118 console.log('Document no longer exists, demo probably unmounted', e);
119 }
120 annotTextContent = textOutput.join('\n');
121 updateGlobalVars(); // Update global variables after annotation extraction completes.
122 // Trigger immediate UI update, if available.
123 if (window.updateUIContent) {
124 window.updateUIContent();
125 }
126 });
127 // Trigger UI update after annotation extraction is complete.
128 if (window.updateUIElements) {
129 window.updateUIElements(pageNumber);
130 }
131};
132
133// Helper function to get PDFDoc from DocumentViewer.
134async function getPDFDocument(documentViewer, PDFNet) {
135 const currentDocument = documentViewer.getDocument();
136 let doc;
137 if (!currentDocument) return;
138 if (currentDocument.type === 'office') {
139 const coreControls = window.WebViewer.getInstance().Core;
140 const buff = await currentDocument.getFileData();
141 const split = currentDocument.filename.split('.');
142 const extension = split[split.length - 1];
143 const options = {
144 extension: extension,
145 };
146 const pdfBuffer = await coreControls.officeToPDFBuffer(buff, options);
147 doc = await PDFNet.PDFDoc.createFromBuffer(pdfBuffer);
148 } else {
149 doc = await currentDocument.getPDFDoc();
150 }
151 return doc;
152};
153
154// Function to set the current page and trigger text extraction.
155function setPage(pageNumber) {
156 // Validate page number.
157 if (isNaN(pageNumber) || pageNumber < 0 || pageNumber > pageCount) return;
158 window.WebViewer.getInstance().Core.documentViewer.setCurrentPage(Number(pageNumber));
159 currentPage = pageNumber;
160 // Trigger text extraction for the new page.
161 getAllTextFromDocument(pageNumber);
162};
163
164// Listener for annotation changes to update annotation text on the current page.
165function getAnnotListener() {
166 getAllTextFromDocument(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
167}
168
169// Function called on page change to update current page.
170function viewerUpdated() {
171 setPage(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
172}
173
174// UI Elements
175// Function to create and initialize UI elements.
176function createUIElements() {
177 // Create a container for all controls (label, dropdown, and buttons).
178 // Dynamically load ui-elements.js, if not already loaded.
179 if (!window.SidePanel) {
180 const script = document.createElement('script');
181 script.src = '/showcase-demos/pdf-text-extractor/ui-elements.js';
182 script.onload = () => {
183 UIElements.init('viewer');
184
185 };
186 document.head.appendChild(script);
187 }
188}
189
190// Function to update global window variables.
191function updateGlobalVars() {
192 window.currentPage = currentPage;
193 window.pageCount = pageCount;
194 window.textContent = textContent;
195 window.annotTextContent = annotTextContent;
196}
197
198// Initialize the WebViewer.
199initializeWebViewer();
200
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/ui-elements.js
4
5class UIElements {
6
7 static init(viewerId) {
8 this.createSidePanel(viewerId);
9 }
10
11 // Function to create a side panel that sits on the left side of the viewer.
12 static createSidePanel(viewerId) {
13 const viewerElement = document.getElementById(viewerId);
14 if (!viewerElement) {
15 console.error(`Viewer element with id '${viewerId}' not found.`);
16 return;
17 }
18
19 // Create the side panel container.
20 const sidePanel = document.createElement('div');
21 sidePanel.id = 'side-panel';
22 sidePanel.className = 'side-panel';
23
24 // Create side panel content.
25 const content = document.createElement('div');
26 content.className = 'side-panel-content';
27
28 // Add the text extraction content.
29 const sampleContent = document.createElement('div');
30 sampleContent.innerHTML = `
31 <div class="panel-section">
32 <h4>Text Extraction</h4>
33
34 <div id="page-info">
35 <label class="page-label"><strong>Page</strong></label>
36 <input type="number" id="input-page-number" min="1" value="0" class="page-input" readonly>
37 <label id="page-count-label" class="page-count-label">of 0 full page text</label>
38 </div>
39
40 <div id="page-text-all">
41 <label class="text-section-label">Page Text Content:</label>
42 <textarea id="page-text-content" class="text-display" readonly placeholder="Page text will appear here..."></textarea>
43 </div>
44
45 <div id="page-text-annotations">
46 <label id="annotations-label" class="text-section-label">Page 1 text under annotations:</label>
47 <textarea id="page-annotations-content" class="text-display" readonly placeholder="Annotation text will appear here..."></textarea>
48 </div>
49 </div>
50 `;
51
52 content.appendChild(sampleContent);
53 sidePanel.appendChild(content);
54
55 // Create a wrapper to contain both the side panel and viewer.
56 const wrapper = document.createElement('div');
57 wrapper.id = 'viewer-wrapper';
58 wrapper.className = 'viewer-wrapper';
59
60 // Insert the wrapper before the viewer element.
61 viewerElement.parentNode.insertBefore(wrapper, viewerElement);
62
63 // Move the viewer element into the wrapper and add the side panel.
64 wrapper.appendChild(sidePanel);
65 wrapper.appendChild(viewerElement);
66
67 // Add the viewer-with-panel class to the viewer element.
68 viewerElement.classList.add('viewer-with-panel');
69 console.log('Side panel created successfully');
70 }
71
72 // Function to add content to the side panel.
73 addPanelContent(content) {
74 const panelContent = document.querySelector('.side-panel-content');
75 if (panelContent) {
76 const contentDiv = document.createElement('div');
77 contentDiv.className = 'panel-section';
78 contentDiv.innerHTML = content;
79 panelContent.appendChild(contentDiv);
80 }
81 }
82
83 // Setup event handlers for the UI elements.
84 static updateUIElements() {
85 const pageInput = document.getElementById('input-page-number');
86 const pageCountLabel = document.getElementById('page-count-label');
87 const pageTextContent = document.getElementById('page-text-content');
88 const pageAnnotationsContent = document.getElementById('page-annotations-content');
89 const annotationsLabel = document.getElementById('annotations-label');
90
91 // Function to update page count display.
92 const updatePageCount = () => {
93 const totalPages = window.pageCount || 0;
94 pageCountLabel.textContent = `of ${totalPages} full page text`;
95 };
96 // Function to update content displays.
97 const updateContent = (pageNumber) => {
98 // Update annotations label.
99 annotationsLabel.textContent = `Page ${pageNumber} text under annotations:`;
100
101 // Call the global function to get text.
102 if (window.getAllTextFromDocument) {
103 window.getAllTextFromDocument(pageNumber);
104 }
105 };
106
107 // Function to immediately update UI content from global variables.
108 const updateUIContent = () => {
109 if (window.textContent !== undefined) {
110 pageTextContent.value = window.textContent || 'No text found on this page.';
111 }
112 if (window.annotTextContent !== undefined) {
113 pageAnnotationsContent.value = window.annotTextContent || 'No annotation text found on this page.';
114 }
115 };
116
117 // Expose the UI update function globally.
118 window.updateUIContent = updateUIContent;
119
120 // Function to update page input from window.currentPage.
121 const updatePageInput = () => {
122 console.log('Updating page input. Current page:', window.currentPage);
123 if (window.currentPage !== undefined && pageInput.value != window.currentPage) {
124 console.log('Page input changed, updating to:', window.currentPage);
125 pageInput.value = window.currentPage;
126 updateContent(window.currentPage);
127 }
128 };
129
130 // Monitor for page count updates and current page changes.
131 const checkPageCount = () => {
132 updatePageCount();
133 updatePageInput();
134 if (window.pageCount > 0) {
135 pageInput.max = window.pageCount;
136 }
137 };
138
139 // Update content for the current page.
140 updatePageCount();
141 updatePageInput();
142 updateContent(window.currentPage);
143 }
144}
145
146window.updateUIElements = UIElements.updateUIElements; // Make it globally accessible.
147
1/* Main layout - side by side containers within #viewer */
2#viewer {
3 display: flex;
4 height: 100%;
5 width: 100%;
6}
7
8/* Side Panel Styles */
9.viewer-wrapper {
10 display: flex;
11 height: 100vh;
12 width: 100%;
13}
14
15.side-panel {
16 width: 300px;
17 min-width: 250px;
18 max-width: 400px;
19 background-color: #f5f5f5;
20 border-right: 1px solid #ddd;
21 box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
22 transition: transform 0.3s ease;
23 z-index: 1000;
24 display: flex;
25 flex-direction: column;
26}
27
28.side-panel.collapsed {
29 transform: translateX(-100%);
30}
31
32.side-panel-header {
33 background-color: #e9ecef;
34 padding: 15px 20px;
35 border-bottom: 1px solid #ddd;
36 flex-shrink: 0;
37}
38
39.side-panel-header h3 {
40 margin: 0;
41 font-size: 18px;
42 font-weight: 600;
43 color: #333;
44}
45
46.side-panel-content {
47 flex: 1;
48 padding: 20px;
49 overflow-y: auto;
50}
51
52.panel-section {
53 margin-bottom: 25px;
54}
55
56.panel-section h4 {
57 margin: 0 0 12px 0;
58 font-size: 14px;
59 font-weight: 600;
60 color: #555;
61 text-transform: uppercase;
62 letter-spacing: 0.5px;
63}
64
65.panel-button {
66 display: block;
67 width: 100%;
68 padding: 10px 15px;
69 margin-bottom: 8px;
70 background-color: #fff;
71 border: 1px solid #ddd;
72 border-radius: 4px;
73 cursor: pointer;
74 transition: all 0.2s ease;
75 font-size: 14px;
76}
77
78.panel-button:hover {
79 background-color: #007bff;
80 color: white;
81 border-color: #007bff;
82}
83
84.panel-button:active {
85 transform: translateY(1px);
86}
87
88/* Text Extraction UI Styles */
89#page-info {
90 display: flex;
91 align-items: center;
92 gap: 8px;
93 margin-bottom: 15px;
94 flex-wrap: wrap;
95}
96
97.page-label {
98 font-size: 14px;
99 font-weight: 600;
100 color: #333;
101 white-space: nowrap;
102}
103
104.page-input {
105 width: 60px;
106 padding: 4px 8px;
107 border: 1px solid #ddd;
108 border-radius: 4px;
109 font-size: 14px;
110 text-align: center;
111}
112
113.page-input:focus {
114 outline: none;
115 border-color: #007bff;
116 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
117}
118
119.page-count-label {
120 font-size: 12px;
121 color: #666;
122 white-space: nowrap;
123}
124
125.text-section-label {
126 display: block;
127 font-size: 12px;
128 font-weight: 600;
129 color: #555;
130 margin-bottom: 5px;
131 text-transform: uppercase;
132 letter-spacing: 0.5px;
133}
134
135.text-display {
136 width: 100%;
137 height: 120px;
138 padding: 10px;
139 border: 1px solid #ddd;
140 border-radius: 4px;
141 font-size: 12px;
142 font-family: 'Courier New', monospace;
143 line-height: 1.4;
144 resize: vertical;
145 background-color: #f9f9f9;
146 color: #333;
147 white-space: pre-wrap;
148 word-wrap: break-word;
149 overflow-y: auto;
150 overflow-x: hidden;
151}
152
153.text-display:focus {
154 outline: none;
155 border-color: #007bff;
156 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
157}
158
159.text-display::placeholder {
160 color: #999;
161 font-style: italic;
162}
163
164#page-text-all {
165 margin-bottom: 20px;
166}
167
168#page-text-annotations {
169 margin-bottom: 15px;
170}
171
172.setting-item {
173 margin-bottom: 15px;
174}
175
176.setting-item label {
177 display: flex;
178 align-items: center;
179 font-size: 14px;
180 color: #555;
181 cursor: pointer;
182}
183
184.setting-item input[type="checkbox"] {
185 margin-right: 10px;
186}
187
188.setting-item input[type="range"] {
189 margin-left: 10px;
190 flex: 1;
191}
192
193.viewer-with-panel {
194 flex: 1;
195 height: 100vh;
196}
197
198/* Dark mode styles */
199@media (prefers-color-scheme: dark) {
200 .side-panel {
201 background-color: #2d3748;
202 border-right-color: #4a5568;
203 }
204
205 .side-panel-header {
206 background-color: #1a202c;
207 border-bottom-color: #4a5568;
208 }
209
210 .side-panel-header h3 {
211 color: #e2e8f0;
212 }
213
214 .panel-section h4 {
215 color: #a0aec0;
216 }
217
218 .panel-button {
219 background-color: #4a5568;
220 border-color: #718096;
221 color: #e2e8f0;
222 }
223
224 .panel-button:hover {
225 background-color: #007bff;
226 border-color: #007bff;
227 }
228
229 .setting-item label {
230 color: #a0aec0;
231 }
232
233 /* Text extraction dark mode styles */
234 .page-label {
235 color: #e2e8f0;
236 }
237
238 .page-input {
239 background-color: #4a5568;
240 border-color: #718096;
241 color: #e2e8f0;
242 }
243
244 .page-input:focus {
245 border-color: #007bff;
246 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
247 }
248
249 .page-count-label {
250 color: #a0aec0;
251 }
252
253 .text-section-label {
254 color: #a0aec0;
255 }
256
257 .text-display {
258 background-color: #4a5568;
259 border-color: #718096;
260 color: #e2e8f0;
261 }
262
263 .text-display:focus {
264 border-color: #007bff;
265 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
266 }
267
268 .text-display::placeholder {
269 color: #718096;
270 }
271}
272
273/* Responsive design */
274@media (max-width: 768px) {
275 .side-panel {
276 position: absolute;
277 left: 0;
278 top: 0;
279 height: 100%;
280 z-index: 1001;
281 }
282
283 .viewer-wrapper {
284 position: relative;
285 }
286
287 .side-panel.collapsed {
288 transform: translateX(-100%);
289 }
290}
291
292/* Toggle button for mobile */
293.side-panel-toggle {
294 position: fixed;
295 top: 20px;
296 left: 20px;
297 z-index: 1002;
298 background-color: #007bff;
299 color: white;
300 border: none;
301 border-radius: 4px;
302 padding: 10px;
303 cursor: pointer;
304 display: none;
305}
306
307@media (max-width: 768px) {
308 .side-panel-toggle {
309 display: block;
310 }
311
312 /* Text extraction responsive styles */
313 #page-info {
314 flex-direction: column;
315 align-items: flex-start;
316 gap: 5px;
317 }
318
319 .page-input {
320 width: 80px;
321 }
322
323 .text-display {
324 height: 100px;
325 font-size: 11px;
326 }
327
328 .page-count-label {
329 font-size: 11px;
330 }
331}
332
333/* Theme Switch Styles */
334.theme-switch-container {
335 display: flex;
336 justify-content: center;
337 margin-top: 15px;
338}
339
340.theme-switch {
341 position: relative;
342 display: flex;
343 background-color: #e9ecef;
344 border-radius: 25px;
345 padding: 4px;
346 border: 2px solid #dee2e6;
347 width: 200px;
348 height: 50px;
349 overflow: hidden;
350}
351
352.theme-switch input[type="radio"] {
353 display: none;
354}
355
356.switch-option {
357 flex: 1;
358 display: flex;
359 flex-direction: column;
360 align-items: center;
361 justify-content: center;
362 cursor: pointer;
363 position: relative;
364 z-index: 2;
365 transition: color 0.3s ease;
366 padding: 5px;
367}
368
369.switch-option.left {
370 border-radius: 20px 0 0 20px;
371}
372
373.switch-option.right {
374 border-radius: 0 20px 20px 0;
375}
376
377.switch-icon {
378 font-size: 16px;
379 margin-bottom: 2px;
380}
381
382.switch-label {
383 font-size: 12px;
384 font-weight: 500;
385 text-transform: uppercase;
386 letter-spacing: 0.5px;
387}
388
389.switch-slider {
390 position: absolute;
391 top: 4px;
392 left: 4px;
393 width: calc(50% - 4px);
394 height: calc(100% - 8px);
395 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
396 border-radius: 20px;
397 transition: transform 0.3s ease, background 0.3s ease;
398 z-index: 1;
399 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
400}
401
402/* Dark mode selected */
403#dark-mode:checked ~ .switch-slider {
404 transform: translateX(0);
405 background: linear-gradient(135deg, #2c3e50 0%, #5a5c5e 100%);
406}
407
408/* Light mode selected */
409#light-mode:checked ~ .switch-slider {
410 transform: translateX(100%);
411 background: linear-gradient(135deg, #e4ce85 0%, #e9ca1d 100%);
412}
413
414/* Text color changes */
415#dark-mode:checked ~ .switch-option.left {
416 color: white;
417}
418
419#light-mode:checked ~ .switch-option.right {
420 color: white;
421}
422
423.switch-option {
424 color: #6c757d;
425}
426
427/* Dark mode theme styles */
428@media (prefers-color-scheme: dark) {
429 .theme-switch {
430 background-color: #4a5568;
431 border-color: #718096;
432 }
433
434 .switch-option {
435 color: #a0aec0;
436 }
437
438 #dark-mode:checked ~ .switch-option.left {
439 color: white;
440 }
441
442 #light-mode:checked ~ .switch-option.right {
443 color: white;
444 }
445}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales