Quickly extract text from PDFs—either from entire pages or highlighted sections only.
This demo allows you to:
Implementation steps
To add PDF Text Extraction capability with WebViewer:
Step 1: Choose your preferred web stack for WebViewer.
Step 2: Add the ES6 JavaScript sample code provided in this guide.
Once you generate your license key, it will automatically be included in your sample code below.
Apryse collects some data regarding your usage of the SDK for product improvement.
The data that Apryse collects include:
For clarity, no other data is collected by the SDK and Apryse has no access to the contents of your documents.
If you wish to continue without data collection, contact us and we will email you a no-tracking trial key for you to get started.
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/index.js
4import WebViewer from '@pdftron/webviewer';
5
6const licenseKey = 'YOUR_WEBVIEWER_LICENSE_KEY';
7
8// Global variables to track state
9let redactionDemoFile = "https://apryse.s3.amazonaws.com/public/files/samples/section-508.pdf";
10let pageCount = 0;
11let textContent = '';
12let annotTextContent = '';
13let currentPage = 0;
14
15// Function to initialize and load the Redaction Tool
16function initializeWebViewer() {
17
18 const element = document.getElementById('viewer');
19 if (!element) {
20 console.error('Viewer div not found.');
21 return;
22 }
23
24 WebViewer({
25 path: '/lib',
26 initialDoc: redactionDemoFile,
27 licenseKey: licenseKey,
28 fullAPI: true,
29 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
30 }, element).then(instance => {
31 // define documentViewer for use in other functions
32 const { documentViewer } = instance.Core;
33 documentViewer.addEventListener('documentLoaded', () => {
34 const { UI } = instance;
35 UI.setLayoutMode(UI.LayoutMode.Single); // Set the layout mode to single page view
36 UI.disableFadePageNavigationComponent(); // Keeps the page navigation component on screen all the time
37 pageCount = documentViewer.getDocument().getPageCount(); // Update page count
38 setPage(1); //Set 1st page to trigger text extraction
39 });
40 // Event listeners for page changes
41 documentViewer.addEventListener('pageNumberUpdated', viewerUpdated);
42 // Annotation change listener to update annotation text when annotations are modified
43 instance.Core.documentViewer
44 .getAnnotationManager()
45 .addEventListener('annotationChanged', getAnnotListener);
46 // UI Section
47 createUIElements();
48 // Trigger immediate UI update if available
49 if (window.updateUIContent) {
50 window.updateUIContent();
51 }
52 });
53}
54
55// Function to extract all text from a given page
56async function getAllTextFromDocument(pageNumber) {
57 const doc = window.WebViewer.getInstance().Core.documentViewer.getDocument();
58 if (pageNumber > 0 && doc) {
59 await getText(pageNumber); // Only proceed if we have a valid document
60 await getAnnotText(pageNumber);
61 } else {
62 console.warn('Document not available or invalid page number');
63 }
64};
65
66// Function to extract text from a given page
67async function getText(pageNumber) {
68 const { documentViewer } = window.WebViewer.getInstance().Core;
69 const doc = documentViewer.getDocument();
70 // Check if document is loaded before proceeding
71 if (!doc) {
72 return;
73 }
74
75 const newPageCount = doc.getPageCount();
76 await doc.loadPageText(pageNumber, (newText) => {
77 textContent = newText;
78 pageCount = newPageCount;
79 pageNumber = pageNumber;
80 });
81}
82
83// Function to extract text under annotations on a given page
84async function getAnnotText(pageNumber) {
85 const { PDFNet, documentViewer } = window.WebViewer.getInstance().Core;
86 await PDFNet.initialize();
87 await documentViewer.getAnnotationsLoadedPromise(); // Ensure annotations are loaded
88 const doc = await getPDFDocument(documentViewer, PDFNet);
89 const annotationManager = documentViewer.getAnnotationManager();
90 const annotList = annotationManager
91 .getAnnotationsList()
92 .filter((a) => a.getPageNumber() === pageNumber);
93 const xfdf_string = await annotationManager.exportAnnotations({ annotationList: annotList });
94 const textOutput = [];
95
96 if (!doc) {
97 console.warn('PDF document not available');
98 return;
99 }
100
101 // Run PDFNet methods with memory management
102 await PDFNet.runWithCleanup(async () => {
103 // lock the document before a write operation
104 // runWithCleanup will auto unlock when complete
105 try {
106 doc.lock();
107 const fdf_doc = await PDFNet.FDFDoc.createFromXFDF(xfdf_string);
108 await doc.fdfUpdate(fdf_doc);
109 const pageTemp = await doc.getPage(pageNumber);
110 const rect = await pageTemp.getCropBox();
111 const te = await PDFNet.TextExtractor.create();
112 te.begin(pageTemp, rect);
113 const annotCount = await pageTemp.getNumAnnots();
114 for (let i = 0; i < annotCount; ++i) {
115 const annot = await pageTemp.getAnnot(i);
116 const annotText = await te.getTextUnderAnnot(annot);
117 textOutput.push(annotText);
118 }
119 } catch (e) {
120 console.log('Document no longer exists, demo probably unmounted', e);
121 }
122 annotTextContent = textOutput.join('\n');
123 updateGlobalVars(); // Update global variables after annotation extraction completes
124 // Trigger immediate UI update if available
125 if (window.updateUIContent) {
126 window.updateUIContent();
127 }
128 });
129 // Trigger UI update after annotation extraction is complete
130 if (window.updateUIElements) {
131 window.updateUIElements(pageNumber);
132 }
133};
134
135// Helper function to get PDFDoc from DocumentViewer
136async function getPDFDocument(documentViewer, PDFNet) {
137 const currentDocument = documentViewer.getDocument();
138 let doc;
139 if (!currentDocument) return;
140 if (currentDocument.type === 'office') {
141 const coreControls = window.WebViewer.getInstance().Core;
142 const buff = await currentDocument.getFileData();
143 const split = currentDocument.filename.split('.');
144 const extension = split[split.length - 1];
145 const options = {
146 extension: extension,
147 };
148 const pdfBuffer = await coreControls.officeToPDFBuffer(buff, options);
149 doc = await PDFNet.PDFDoc.createFromBuffer(pdfBuffer);
150 } else {
151 doc = await currentDocument.getPDFDoc();
152 }
153 return doc;
154};
155
156// Function to set the current page and trigger text extraction
157function setPage(pageNumber) {
158 // Validate page number
159 if (isNaN(pageNumber) || pageNumber < 0 || pageNumber > pageCount) return;
160 window.WebViewer.getInstance().Core.documentViewer.setCurrentPage(Number(pageNumber));
161 currentPage = pageNumber;
162 // Trigger text extraction for the new page
163 getAllTextFromDocument(pageNumber);
164};
165
166// Listener for annotation changes to update annotation text on the current page
167function getAnnotListener() {
168 getAllTextFromDocument(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
169}
170
171// Function called on page change to update current page
172function viewerUpdated() {
173 setPage(window.WebViewer.getInstance().Core.documentViewer.getCurrentPage());
174}
175
176// UI Elements
177// Function to create and initialize UI elements
178function createUIElements() {
179 // Create a container for all controls (label, dropdown, and buttons)
180 // Dynamically load ui-elements.js if not already loaded
181 if (!window.SidePanel) {
182 const script = document.createElement('script');
183 script.src = '/showcase-demos/pdf-text-extractor/ui-elements.js';
184 script.onload = () => {
185 UIElements.init('viewer');
186
187 };
188 document.head.appendChild(script);
189 }
190}
191
192// Function to update global window variables
193function updateGlobalVars() {
194 window.currentPage = currentPage;
195 window.pageCount = pageCount;
196 window.textContent = textContent;
197 window.annotTextContent = annotTextContent;
198}
199
200// Initialize the WebViewer
201initializeWebViewer();
202
1// ES6 Compliant Syntax
2// GitHub Copilot, Claude Sonnet 4 (Preview), October 14, 2025
3// File: showcase-demos/pdf-text-extractor/ui-elements.js
4
5class UIElements {
6
7 static init(viewerId) {
8 this.createSidePanel(viewerId);
9 }
10
11 // Function to create a side panel that sits on the left side of the viewer
12 static createSidePanel(viewerId) {
13 const viewerElement = document.getElementById(viewerId);
14 if (!viewerElement) {
15 console.error(`Viewer element with id '${viewerId}' not found.`);
16 return;
17 }
18
19 // Create the side panel container
20 const sidePanel = document.createElement('div');
21 sidePanel.id = 'side-panel';
22 sidePanel.className = 'side-panel';
23
24 // Create side panel content
25 const content = document.createElement('div');
26 content.className = 'side-panel-content';
27
28 // Add the text extraction content
29 const sampleContent = document.createElement('div');
30 sampleContent.innerHTML = `
31 <div class="panel-section">
32 <h4>Text Extraction</h4>
33
34 <div id="page-info">
35 <label class="page-label"><strong>Page</strong></label>
36 <input type="number" id="input-page-number" min="1" value="0" class="page-input" readonly>
37 <label id="page-count-label" class="page-count-label">of 0 full page text</label>
38 </div>
39
40 <div id="page-text-all">
41 <label class="text-section-label">Page Text Content:</label>
42 <textarea id="page-text-content" class="text-display" readonly placeholder="Page text will appear here..."></textarea>
43 </div>
44
45 <div id="page-text-annotations">
46 <label id="annotations-label" class="text-section-label">Page 1 text under annotations:</label>
47 <textarea id="page-annotations-content" class="text-display" readonly placeholder="Annotation text will appear here..."></textarea>
48 </div>
49 </div>
50 `;
51
52 content.appendChild(sampleContent);
53 sidePanel.appendChild(content);
54
55 // Create a wrapper to contain both the side panel and viewer
56 const wrapper = document.createElement('div');
57 wrapper.id = 'viewer-wrapper';
58 wrapper.className = 'viewer-wrapper';
59
60 // Insert the wrapper before the viewer element
61 viewerElement.parentNode.insertBefore(wrapper, viewerElement);
62
63 // Move the viewer element into the wrapper and add the side panel
64 wrapper.appendChild(sidePanel);
65 wrapper.appendChild(viewerElement);
66
67 // Add the viewer-with-panel class to the viewer element
68 viewerElement.classList.add('viewer-with-panel');
69 console.log('Side panel created successfully');
70 }
71
72 // Function to add content to the side panel
73 addPanelContent(content) {
74 const panelContent = document.querySelector('.side-panel-content');
75 if (panelContent) {
76 const contentDiv = document.createElement('div');
77 contentDiv.className = 'panel-section';
78 contentDiv.innerHTML = content;
79 panelContent.appendChild(contentDiv);
80 }
81 }
82
83 // Setup event handlers for the UI elements
84 static updateUIElements() {
85 const pageInput = document.getElementById('input-page-number');
86 const pageCountLabel = document.getElementById('page-count-label');
87 const pageTextContent = document.getElementById('page-text-content');
88 const pageAnnotationsContent = document.getElementById('page-annotations-content');
89 const annotationsLabel = document.getElementById('annotations-label');
90
91 // Function to update page count display
92 const updatePageCount = () => {
93 const totalPages = window.pageCount || 0;
94 pageCountLabel.textContent = `of ${totalPages} full page text`;
95 };
96 // Function to update content displays
97 const updateContent = (pageNumber) => {
98 // Update annotations label
99 annotationsLabel.textContent = `Page ${pageNumber} text under annotations:`;
100
101 // Call the global function to get text
102 if (window.getAllTextFromDocument) {
103 window.getAllTextFromDocument(pageNumber);
104 }
105 };
106
107 // Function to immediately update UI content from global variables
108 const updateUIContent = () => {
109 if (window.textContent !== undefined) {
110 pageTextContent.value = window.textContent || 'No text found on this page.';
111 }
112 if (window.annotTextContent !== undefined) {
113 pageAnnotationsContent.value = window.annotTextContent || 'No annotation text found on this page.';
114 }
115 };
116
117 // Expose the UI update function globally
118 window.updateUIContent = updateUIContent;
119
120 // Function to update page input from window.currentPage
121 const updatePageInput = () => {
122 console.log('Updating page input. Current page:', window.currentPage);
123 if (window.currentPage !== undefined && pageInput.value != window.currentPage) {
124 console.log('Page input changed, updating to:', window.currentPage);
125 pageInput.value = window.currentPage;
126 updateContent(window.currentPage);
127 }
128 };
129
130 // Monitor for page count updates and current page changes
131 const checkPageCount = () => {
132 updatePageCount();
133 updatePageInput();
134 if (window.pageCount > 0) {
135 pageInput.max = window.pageCount;
136 }
137 };
138
139 // Update content for the current page
140 updatePageCount();
141 updatePageInput();
142 updateContent(window.currentPage);
143 }
144}
145
146window.updateUIElements = UIElements.updateUIElements; //Make it globally accessible
147
1/* Main layout - side by side containers within #viewer */
2#viewer {
3 display: flex;
4 height: 100%;
5 width: 100%;
6}
7
8/* Side Panel Styles */
9.viewer-wrapper {
10 display: flex;
11 height: 100vh;
12 width: 100%;
13}
14
15.side-panel {
16 width: 300px;
17 min-width: 250px;
18 max-width: 400px;
19 background-color: #f5f5f5;
20 border-right: 1px solid #ddd;
21 box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
22 transition: transform 0.3s ease;
23 z-index: 1000;
24 display: flex;
25 flex-direction: column;
26}
27
28.side-panel.collapsed {
29 transform: translateX(-100%);
30}
31
32.side-panel-header {
33 background-color: #e9ecef;
34 padding: 15px 20px;
35 border-bottom: 1px solid #ddd;
36 flex-shrink: 0;
37}
38
39.side-panel-header h3 {
40 margin: 0;
41 font-size: 18px;
42 font-weight: 600;
43 color: #333;
44}
45
46.side-panel-content {
47 flex: 1;
48 padding: 20px;
49 overflow-y: auto;
50}
51
52.panel-section {
53 margin-bottom: 25px;
54}
55
56.panel-section h4 {
57 margin: 0 0 12px 0;
58 font-size: 14px;
59 font-weight: 600;
60 color: #555;
61 text-transform: uppercase;
62 letter-spacing: 0.5px;
63}
64
65.panel-button {
66 display: block;
67 width: 100%;
68 padding: 10px 15px;
69 margin-bottom: 8px;
70 background-color: #fff;
71 border: 1px solid #ddd;
72 border-radius: 4px;
73 cursor: pointer;
74 transition: all 0.2s ease;
75 font-size: 14px;
76}
77
78.panel-button:hover {
79 background-color: #007bff;
80 color: white;
81 border-color: #007bff;
82}
83
84.panel-button:active {
85 transform: translateY(1px);
86}
87
88/* Text Extraction UI Styles */
89#page-info {
90 display: flex;
91 align-items: center;
92 gap: 8px;
93 margin-bottom: 15px;
94 flex-wrap: wrap;
95}
96
97.page-label {
98 font-size: 14px;
99 font-weight: 600;
100 color: #333;
101 white-space: nowrap;
102}
103
104.page-input {
105 width: 60px;
106 padding: 4px 8px;
107 border: 1px solid #ddd;
108 border-radius: 4px;
109 font-size: 14px;
110 text-align: center;
111}
112
113.page-input:focus {
114 outline: none;
115 border-color: #007bff;
116 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
117}
118
119.page-count-label {
120 font-size: 12px;
121 color: #666;
122 white-space: nowrap;
123}
124
125.text-section-label {
126 display: block;
127 font-size: 12px;
128 font-weight: 600;
129 color: #555;
130 margin-bottom: 5px;
131 text-transform: uppercase;
132 letter-spacing: 0.5px;
133}
134
135.text-display {
136 width: 100%;
137 height: 120px;
138 padding: 10px;
139 border: 1px solid #ddd;
140 border-radius: 4px;
141 font-size: 12px;
142 font-family: 'Courier New', monospace;
143 line-height: 1.4;
144 resize: vertical;
145 background-color: #f9f9f9;
146 color: #333;
147 white-space: pre-wrap;
148 word-wrap: break-word;
149 overflow-y: auto;
150 overflow-x: hidden;
151}
152
153.text-display:focus {
154 outline: none;
155 border-color: #007bff;
156 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.25);
157}
158
159.text-display::placeholder {
160 color: #999;
161 font-style: italic;
162}
163
164#page-text-all {
165 margin-bottom: 20px;
166}
167
168#page-text-annotations {
169 margin-bottom: 15px;
170}
171
172.setting-item {
173 margin-bottom: 15px;
174}
175
176.setting-item label {
177 display: flex;
178 align-items: center;
179 font-size: 14px;
180 color: #555;
181 cursor: pointer;
182}
183
184.setting-item input[type="checkbox"] {
185 margin-right: 10px;
186}
187
188.setting-item input[type="range"] {
189 margin-left: 10px;
190 flex: 1;
191}
192
193.viewer-with-panel {
194 flex: 1;
195 height: 100vh;
196}
197
198/* Dark mode styles */
199@media (prefers-color-scheme: dark) {
200 .side-panel {
201 background-color: #2d3748;
202 border-right-color: #4a5568;
203 }
204
205 .side-panel-header {
206 background-color: #1a202c;
207 border-bottom-color: #4a5568;
208 }
209
210 .side-panel-header h3 {
211 color: #e2e8f0;
212 }
213
214 .panel-section h4 {
215 color: #a0aec0;
216 }
217
218 .panel-button {
219 background-color: #4a5568;
220 border-color: #718096;
221 color: #e2e8f0;
222 }
223
224 .panel-button:hover {
225 background-color: #007bff;
226 border-color: #007bff;
227 }
228
229 .setting-item label {
230 color: #a0aec0;
231 }
232
233 /* Text extraction dark mode styles */
234 .page-label {
235 color: #e2e8f0;
236 }
237
238 .page-input {
239 background-color: #4a5568;
240 border-color: #718096;
241 color: #e2e8f0;
242 }
243
244 .page-input:focus {
245 border-color: #007bff;
246 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
247 }
248
249 .page-count-label {
250 color: #a0aec0;
251 }
252
253 .text-section-label {
254 color: #a0aec0;
255 }
256
257 .text-display {
258 background-color: #4a5568;
259 border-color: #718096;
260 color: #e2e8f0;
261 }
262
263 .text-display:focus {
264 border-color: #007bff;
265 box-shadow: 0 0 0 2px rgba(0, 123, 255, 0.4);
266 }
267
268 .text-display::placeholder {
269 color: #718096;
270 }
271}
272
273/* Responsive design */
274@media (max-width: 768px) {
275 .side-panel {
276 position: absolute;
277 left: 0;
278 top: 0;
279 height: 100%;
280 z-index: 1001;
281 }
282
283 .viewer-wrapper {
284 position: relative;
285 }
286
287 .side-panel.collapsed {
288 transform: translateX(-100%);
289 }
290}
291
292/* Toggle button for mobile */
293.side-panel-toggle {
294 position: fixed;
295 top: 20px;
296 left: 20px;
297 z-index: 1002;
298 background-color: #007bff;
299 color: white;
300 border: none;
301 border-radius: 4px;
302 padding: 10px;
303 cursor: pointer;
304 display: none;
305}
306
307@media (max-width: 768px) {
308 .side-panel-toggle {
309 display: block;
310 }
311
312 /* Text extraction responsive styles */
313 #page-info {
314 flex-direction: column;
315 align-items: flex-start;
316 gap: 5px;
317 }
318
319 .page-input {
320 width: 80px;
321 }
322
323 .text-display {
324 height: 100px;
325 font-size: 11px;
326 }
327
328 .page-count-label {
329 font-size: 11px;
330 }
331}
332
333/* Theme Switch Styles */
334.theme-switch-container {
335 display: flex;
336 justify-content: center;
337 margin-top: 15px;
338}
339
340.theme-switch {
341 position: relative;
342 display: flex;
343 background-color: #e9ecef;
344 border-radius: 25px;
345 padding: 4px;
346 border: 2px solid #dee2e6;
347 width: 200px;
348 height: 50px;
349 overflow: hidden;
350}
351
352.theme-switch input[type="radio"] {
353 display: none;
354}
355
356.switch-option {
357 flex: 1;
358 display: flex;
359 flex-direction: column;
360 align-items: center;
361 justify-content: center;
362 cursor: pointer;
363 position: relative;
364 z-index: 2;
365 transition: color 0.3s ease;
366 padding: 5px;
367}
368
369.switch-option.left {
370 border-radius: 20px 0 0 20px;
371}
372
373.switch-option.right {
374 border-radius: 0 20px 20px 0;
375}
376
377.switch-icon {
378 font-size: 16px;
379 margin-bottom: 2px;
380}
381
382.switch-label {
383 font-size: 12px;
384 font-weight: 500;
385 text-transform: uppercase;
386 letter-spacing: 0.5px;
387}
388
389.switch-slider {
390 position: absolute;
391 top: 4px;
392 left: 4px;
393 width: calc(50% - 4px);
394 height: calc(100% - 8px);
395 background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
396 border-radius: 20px;
397 transition: transform 0.3s ease, background 0.3s ease;
398 z-index: 1;
399 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
400}
401
402/* Dark mode selected */
403#dark-mode:checked ~ .switch-slider {
404 transform: translateX(0);
405 background: linear-gradient(135deg, #2c3e50 0%, #5a5c5e 100%);
406}
407
408/* Light mode selected */
409#light-mode:checked ~ .switch-slider {
410 transform: translateX(100%);
411 background: linear-gradient(135deg, #e4ce85 0%, #e9ca1d 100%);
412}
413
414/* Text color changes */
415#dark-mode:checked ~ .switch-option.left {
416 color: white;
417}
418
419#light-mode:checked ~ .switch-option.right {
420 color: white;
421}
422
423.switch-option {
424 color: #6c757d;
425}
426
427/* Dark mode theme styles */
428@media (prefers-color-scheme: dark) {
429 .theme-switch {
430 background-color: #4a5568;
431 border-color: #718096;
432 }
433
434 .switch-option {
435 color: #a0aec0;
436 }
437
438 #dark-mode:checked ~ .switch-option.left {
439 color: white;
440 }
441
442 #light-mode:checked ~ .switch-option.right {
443 color: white;
444 }
445}
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales