Easily produce a JSON file describing the structure of a PDF file. After extraction, the displayed PDF will have annotations showing the identified elements. The first two pages are analyzed resulting in a JSON file that can be viewed.
Note
In this demo the first page in the JSON file is skipped as it contains a trial demo message. Processing starts on the next page after the trial demo page.
This demo lets you:
Implementation steps
To add Document Structure Extraction with WebViewer:
Step 1: Choose your preferred web stack
Step 2: Download any required modules listed in the Demo Dependencies section below
Step 3: Add the ES6 JavaScript sample code provided in this guide
Demo Dependencies
This sample uses the following:
1// ES6 Compliant Syntax
2// Copilot name: GitHub Copilot, version: 1.0.0, model: GPT-4, version: 2024-06, date: 2025-09-15
3// File: client/index.js
4
5// **Important**
6// You must get a license key from Apryse to the server to run.
7// A trial key can be obtained from:
8// https://docs.apryse.com/core/guides/get-started/trial-key
9const licenseKey = 'YOUR_LICENSE_KEY';
10const viewerElement = document.getElementById('viewer');
11const initialDoc = 'https://apryse.s3.us-west-1.amazonaws.com/public/files/samples/sales-invoice-with-credit-cards.pdf';
12let jsonData = null;
13const documentStructureMap = [
14 { type: 'image', title: 'Image', color: { R: 1, G: 129, B: 1, A: 1 } },
15 { type: 'paragraph', title: 'Paragraph', color: { R: 254, G: 254, B: 1, A: 1 } },
16 { type: 'list', title: 'List', color: { R: 254, G: 129, B: 193, A: 1 } },
17 { type: 'heading', title: 'Heading', color: { R: 254, G: 166, B: 1, A: 1 } },
18 { type: 'header', title: 'Header', color: { R: 200, G: 20, B: 128, A: 1 } },
19 { type: 'footer', title: 'Footer', color: { R: 3, G: 219, B: 252, A: 1 } },
20 { type: 'graphic', title: 'Graphic', color: { R: 255, G: 0, B: 0, A: 1 } },
21 { type: 'td', title: 'Table Data Cell', color: { R: 51, G: 101, B: 251, A: 1 } },
22 { type: 'th', title: 'Table Header Cell', color: { R: 128, G: 0, B: 128, A: 1 } },
23 { type: 'textbox', title: 'Text Box', color: { R: 20, G: 230, B: 50, A: 1 } },
24 { type: 'group', title: 'Group', color: { R: 0, G: 0, B: 0, A: 1 } },
25];
26
27// The list of registered panels in the main webviewer
28let viewerPanels = null;
29
30// The tab panel, representing the webviewer left panel
31const tabPanel = {
32 handle: null,
33 dataElement: 'tabPanel'
34};
35
36// The custom document structure sub-panel to be registered
37const documentStructurePanel = {
38 handle: null,
39 dataElement: 'documentStructurePanel',
40 render: null,
41};
42
43// Customize the main webviewer left panel after the load completion
44const customizeUI = (instance) => {
45 const { UI } = instance;
46
47 // close the tab panel (if it's open) for refreshment.
48 UI.closeElements([tabPanel.dataElement]);
49
50 // Get the list of registered panels in the main webviewer
51 viewerPanels = UI.getPanels();
52
53 // Find the Tab Panel to modify. The document structure sub-panel will be added to this Tab panel.
54 tabPanel.handle = viewerPanels.find((panel) => panel.dataElement === tabPanel.dataElement);
55
56 // Register the custom document structure sub-panel
57 RegisterDocumentStructurePanel(instance);
58
59 // Add the new custom document structure sub-panel to list of sub-panels under the Tab Panel
60 documentStructurePanel.handle = { render: documentStructurePanel.dataElement };
61 tabPanel.handle.panelsList = [documentStructurePanel.handle, ...tabPanel.handle.panelsList];
62
63 UI.openElements([tabPanel.dataElement]);
64 UI.setPanelWidth(tabPanel.dataElement, 400);
65};
66
67// Register the custom document structure sub-panel
68const RegisterDocumentStructurePanel = (instance) => {
69 documentStructurePanel.render = CreateDocumentStructurePanelElements(instance);
70 instance.UI.addPanel({
71 dataElement: documentStructurePanel.dataElement,
72 location: 'left',
73 icon: '<svg width="18px" height="18px" viewBox="0 0 24 24" id="圖層_1" data-name="圖層 1" xmlns="http://www.w3.org/2000/svg"><defs><style>.cls-1{fill:#080808;}</style></defs><title>form</title><path class="cls-1" d="M21,.5H3a2,2,0,0,0-2,2V22a2,2,0,0,0,2,2H21a2,2,0,0,0,2-2V2.5A2,2,0,0,0,21,.5Zm0,2v2H3v-2ZM3,22V6.5H21V22Z"/><path class="cls-1" d="M12.5,4H20a.5.5,0,0,0,0-1H12.5a.5.5,0,0,0,0,1Z"/><path class="cls-1" d="M4.5,4a.43.43,0,0,0,.19,0,.35.35,0,0,0,.16-.11A.47.47,0,0,0,5,3.5a.43.43,0,0,0,0-.19.36.36,0,0,0-.11-.16.5.5,0,0,0-.7,0A.35.35,0,0,0,4,3.31.43.43,0,0,0,4,3.5a.51.51,0,0,0,.5.5Z"/><path class="cls-1" d="M5.65,3.85A.36.36,0,0,0,5.81,4,.44.44,0,0,0,6,4a.47.47,0,0,0,.35-.15.36.36,0,0,0,.11-.16.6.6,0,0,0,0-.19.51.51,0,0,0-.15-.35A.49.49,0,0,0,5.81,3a.36.36,0,0,0-.16.11.47.47,0,0,0-.15.35.4.4,0,0,0,0,.19A.35.35,0,0,0,5.65,3.85Z"/><path class="cls-1" d="M8,8H4.5a1,1,0,0,0,0,2H8A1,1,0,0,0,8,8Z"/><path class="cls-1" d="M8,11.67H4.5a1,1,0,0,0,0,2H8a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M8,15.33H4.5a1,1,0,0,0,0,2H8a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M8,19H4.5a1,1,0,0,0,0,2H8a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M14,8H10.5a1,1,0,0,0,0,2H14a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M14,11.67H10.5a1,1,0,0,0,0,2H14a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M14,15.33H10.5a1,1,0,0,0,0,2H14a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M14,19H10.5a1,1,0,0,0,0,2H14a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M19.5,8h-3a1,1,0,0,0,0,2h3a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M19.5,11.67h-3a1,1,0,0,0,0,2h3a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M19.5,15.33h-3a1,1,0,0,0,0,2h3a1,1,0,0,0,0-2Z"/><path class="cls-1" d="M19.5,19h-3a1,1,0,0,0,0,2h3a1,1,0,0,0,0-2Z"/></svg>',
74 title: 'Document Structure',
75 render: () => documentStructurePanel.render,
76 });
77};
78
79// Create the document structure panel elements.
80const CreateDocumentStructurePanelElements = (instance) => {
81 let panelDiv = document.createElement('div');
82 panelDiv.id = 'documentStructure';
83 let paragraph = document.createTextNode('A sample PDF will have the first two pages analyzed and the resulting JSON can be viewed. After extraction, the displayed PDF will have annotations showing the identified elements.');
84 panelDiv.appendChild(paragraph);
85
86 const span = document.createElement("span");
87 span.style.color = 'orange';
88 span.appendChild(document.createTextNode('NOTE: Only the first two pages will be processed.'));
89 panelDiv.appendChild(document.createElement('p'));
90 panelDiv.appendChild(span);
91
92 let dividerDiv = document.createElement('div');
93 dividerDiv.style.borderTop = '1px solid #ccc';
94 dividerDiv.style.margin = '10px 0';
95 panelDiv.appendChild(dividerDiv);
96
97 // Extract document structure button
98 let extractDocumentStructureButton = document.createElement('button');
99 extractDocumentStructureButton.textContent = 'Extract Document Structure';
100 extractDocumentStructureButton.id = 'extractDocumentStructureButton';
101 extractDocumentStructureButton.disabled = true;
102 extractDocumentStructureButton.style.backgroundColor = 'gray';
103 extractDocumentStructureButton.style.color = 'darkgray';
104 extractDocumentStructureButton.onclick = async () => {
105
106 extractDocumentStructureButton.style.cursor = "not-allowed"; // Changes cursor for the button itself
107 documentStructurePanel.render.style.cursor = "not-allowed"; // Changes cursor for the button itself
108
109 enableButton(extractDocumentStructureButton, false);
110 await extractDocumentStructure(instance); // Extract document structure
111
112 extractDocumentStructureButton.style.cursor = "default";
113 documentStructurePanel.render.style.cursor = "default";
114 }
115
116 panelDiv.appendChild(extractDocumentStructureButton);
117 panelDiv.appendChild(document.createElement('p'));
118
119 return panelDiv;
120};
121
122// Open JSON data in a viewer with zoom in/out and close buttons
123const openJsonDataDialog = (jsonText) => {
124 let fontSize = 14;
125
126 // Create overlay
127 const overlay = document.createElement("div");
128 overlay.className = "modal-overlay";
129 overlay.onclick = (e) => {
130 if (e.target === overlay) {
131 document.body.removeChild(overlay);
132 }
133 };
134
135 // Modal box
136 const modal = document.createElement("div");
137 modal.className = "modal-box";
138
139 // Controls
140 const controls = document.createElement("div");
141 controls.className = "modal-controls";
142
143 const zoomInBtn = document.createElement("button");
144 zoomInBtn.textContent = "+";
145 zoomInBtn.onclick = () => {
146 fontSize += 2;
147 content.style.fontSize = fontSize + "px";
148 };
149
150 const zoomOutBtn = document.createElement("button");
151 zoomOutBtn.textContent = "-";
152 zoomOutBtn.onclick = () => {
153 fontSize = Math.max(10, fontSize - 2);
154 content.style.fontSize = fontSize + "px";
155 };
156
157 const closeBtn = document.createElement("button");
158 closeBtn.textContent = "Close";
159 closeBtn.className = "modal-close";
160 closeBtn.onclick = () => {
161 document.body.removeChild(overlay);
162 };
163
164 controls.appendChild(zoomInBtn);
165 controls.appendChild(zoomOutBtn);
166 controls.appendChild(closeBtn);
167
168 // Content
169 const content = document.createElement("pre");
170 content.className = "modal-content";
171 content.style.fontSize = fontSize + "px";
172 content.innerHTML = jsonText;
173
174 modal.appendChild(controls);
175 modal.appendChild(content);
176 overlay.appendChild(modal);
177 document.body.appendChild(overlay);
178}
179
180// Draw a rectangle annotation for the given item on the specified page
181const drawAnnotationRectangle = (instance, pageNumber, item) => {
182 const { annotationManager, Annotations } = instance.Core;
183 const color = documentStructureMap.find(field => field.type === item.type).color;
184 const annot = new Annotations.RectangleAnnotation({
185 PageNumber: pageNumber,
186 X: item.rect[0],
187 Y: item.rect[1],
188 Width: item.rect[2] - item.rect[0],
189 Height: item.rect[3] - item.rect[1],
190 StrokeColor: new Annotations.Color(color.R, color.G, color.B, color.A),
191 StrokeThickness: 2,
192 });
193
194 if (annot) {
195 annotationManager.addAnnotation(annot);
196 annotationManager.redrawAnnotation(annot);
197 }
198}
199
200// draw annotations on the PDF when extracting document structure
201const drawAnnotations = (instance) => {
202
203 jsonData.pages.forEach((page) => {
204 page.elements.forEach((element) => {
205 switch (element.type) {
206 case 'table':
207 element.trs.forEach((tr) => {
208 tr.tds.forEach((td) => {
209 drawAnnotationRectangle(instance, page.properties.pageNumber, td);
210 });
211 });
212 break;
213 case 'graphic':
214 if (element.contents)
215 element.contents.forEach((content) => {
216 drawAnnotationRectangle(instance, page.properties.pageNumber, content);
217 });
218 else
219 drawAnnotationRectangle(instance, page.properties.pageNumber, element);
220 break;
221 default:
222 drawAnnotationRectangle(instance, page.properties.pageNumber, element);
223 break
224 }
225 });
226 });
227}
228
229// Remove trial mode page from JSON data.
230// If demo license key is provided instead of production,
231// the first page of the received JSON data will contain
232// a message indicating that Apryse SDK is running in trial mode.
233const removeJSONTrialPage = (json) => {
234 let pageIndexToRemove = -1;
235
236 // Get the page index to remove
237 for (const page of json.pages) {
238 for (const element of page.elements) {
239 if (element.contents && element.contents.length) {
240 for (const content of element.contents) {
241 if (content.text && content.text.includes('Apryse Data Extraction Module trial mode.')) {
242 pageIndexToRemove = page.properties.pageNumber - 1;
243 break;
244 }
245 }
246 }
247
248 if (pageIndexToRemove !== -1)
249 break;
250 }
251 if (pageIndexToRemove !== -1)
252 break;
253 }
254
255 // Remove the page and update page numbers
256 if (pageIndexToRemove !== -1) {
257 json.pages.splice(pageIndexToRemove, 1);
258 for (let i = 0; i < json.pages.length; i++) {
259 json.pages[i].properties.pageNumber = i + 1;
260 }
261 }
262
263 return json;
264};
265
266// Remove watermark rectangle from JSON data
267const removeWatermarkRect = (json) => {
268 const apryseWatermarkRect = [214.67, 270.59, 397.85, 523.01];
269 json.pages.forEach((page) => {
270 page.elements = page.elements.filter((element) =>
271 element?.rect.every((v, i) => v !== apryseWatermarkRect[i])
272 );
273 });
274
275 return json;
276};
277
278// Extract document structure from the PDF document
279// This function will send GET message to the server,
280// to receive the extracted document structure as JSON object.
281const extractDocumentStructure = async (instance) => {
282
283 const doc = instance.Core.documentViewer.getDocument();
284
285 // Make a GET request to get the extracted JSON data of document structure of the current PDF.
286 return new Promise(function (resolve) {
287 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
288 method: 'GET'
289 }).then(function (response) {
290 if (response.status === 200) {
291 response.text().then(function (json) {
292 jsonData = JSON.parse(json);
293
294 // check if the received JSON data is valid
295 if (jsonData === null || jsonData === 'undefined' || !jsonData.pages) {
296 console.error('❌ Received invalid JSON data from server');
297 resolve();
298 return;
299 }
300
301 jsonData = removeJSONTrialPage(jsonData); // remove trial mode page from JSON data
302 jsonData = removeWatermarkRect(jsonData); // remove watermark rectangle from JSON data
303 let jsonText = JSON.stringify(jsonData, null, 2);
304 jsonText = jsonText.replace(/\\r\\n/g, '\n');
305 jsonText = jsonText.replace(/\\"/g, '"');
306
307 // Display the extracted document structure color legend
308 let colorsDiv = document.createElement('div');
309 colorsDiv.id = 'json';
310 colorsDiv.className = "listContainer";
311 const colorsTitle = document.createElement("h3");
312 colorsTitle.textContent = "Color Legend";
313 colorsDiv.appendChild(colorsTitle);
314 colorsDiv.appendChild(document.createElement('p'));
315
316 // Create list items
317 documentStructureMap.forEach(field => {
318 const color = new instance.Core.Annotations.Color(field.color.R, field.color.G, field.color.B, field.color.A);
319 const listItem = document.createElement("div");
320 listItem.className = "listItem";
321 listItem.textContent = field.text;
322 listItem.style.setProperty("--bullet-color", color);
323 listItem.style.setProperty("color", color);
324 listItem.style.setProperty("font-weight", "bold");
325
326 // Set bullet color using ::before
327 listItem.style.setProperty("--bullet-color", color);
328 listItem.style.setProperty("position", "relative");
329 listItem.style.setProperty("padding-left", "20px");
330 listItem.style.setProperty("margin", "8px 0");
331
332 // Add custom bullet using inline style
333 listItem.style.setProperty("list-style", "none");
334 listItem.style.setProperty("display", "block");
335 listItem.style.setProperty("line-height", "1.5");
336 listItem.style.setProperty("font-size", "14px");
337
338 // Create bullet manually
339 const bullet = document.createElement("span");
340 bullet.style.width = "10px";
341 bullet.style.height = "10px";
342 bullet.style.borderRadius = "50%";
343 bullet.style.backgroundColor = color;
344 bullet.style.display = "inline-block";
345 bullet.style.marginRight = "10px";
346 bullet.style.verticalAlign = "middle";
347
348 // Insert bullet before text
349 listItem.textContent = ""; // Clear text
350 listItem.appendChild(bullet);
351 listItem.appendChild(document.createTextNode(field.title));
352
353 colorsDiv.appendChild(listItem);
354 });
355
356 documentStructurePanel.render.appendChild(colorsDiv);
357
358 // Display the extracted document structure JSON data
359 let jsonDiv = document.createElement('div');
360 jsonDiv.id = 'json';
361 const jsonTitle = document.createElement("h3");
362 jsonTitle.textContent = "JSON Data";
363 jsonDiv.appendChild(jsonTitle);
364 jsonDiv.appendChild(document.createElement('p'));
365
366 const scrollBox = document.createElement("div");
367 scrollBox.style.width = "350px";
368 scrollBox.style.height = "350px";
369 scrollBox.style.border = "2px solid #444";
370 scrollBox.style.overflow = "scroll"; // Enables both vertical and horizontal scroll
371 scrollBox.style.whiteSpace = "nowrap"; // Prevents wrapping for horizontal scroll
372 scrollBox.style.padding = "10px";
373 scrollBox.style.fontFamily = "monospace";
374 scrollBox.style.backgroundColor = "black";
375 scrollBox.style.color = "white";
376
377 // Format and insert JSON data
378 const jsonContent = document.createElement("pre");
379 jsonContent.textContent = jsonText;
380 scrollBox.appendChild(jsonContent);
381 jsonDiv.appendChild(scrollBox);
382
383 // Open JSON data dialog button
384 let jsonDataDialogButton = document.createElement('button');
385 jsonDataDialogButton.textContent = 'Open in Dialog';
386 jsonDataDialogButton.id = 'jsonDataDialogButton';
387 jsonDataDialogButton.style.backgroundColor = 'blue';
388 jsonDataDialogButton.style.color = 'white';
389 jsonDataDialogButton.onclick = () => openJsonDataDialog(jsonText);
390 jsonDiv.appendChild(jsonDataDialogButton);
391 jsonDiv.appendChild(document.createElement('p'));
392
393 documentStructurePanel.render.appendChild(jsonDiv);
394 drawAnnotations(instance);
395 resolve();
396 })
397 }
398 else if (response.status === 500) {
399 jsonData = null;
400 resolve();
401 }
402 });
403 });
404};
405
406// Enable or disable a button based on the state
407const enableButton = (button, state) => {
408 button.disabled = !state;
409 button.style.backgroundColor = (state) ? 'blue' : 'gray';
410 button.style.color = (state) ? 'white' : 'darkgray';
411};
412
413WebViewer({
414 path: '/lib',
415 initialDoc: initialDoc,
416 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
417 enableMeasurement: true,
418 loadAsPDF: true,
419 licenseKey: licenseKey,
420}, viewerElement).then(instance => {
421
422 // Once the PDF document is loaded, send it to the server.
423 // The sent PDF document will be processed by the server,
424 // by extracting document structure JSON data when the user clicks the "Extract Document Structure" button.
425 instance.Core.documentViewer.addEventListener('documentLoaded', async () => {
426
427 // Customize the main webviewer left panel after the load completion
428 customizeUI(instance);
429
430 // Reset JSON data
431 jsonData = null;
432
433 // Preparation of the PDF blob to be sent to the server
434 const doc = instance.Core.documentViewer.getDocument();
435 const xfdfString = await instance.Core.annotationManager.exportAnnotations(); // obtaining annotations in the loaded document
436 const data = await doc.getFileData({ xfdfString });
437 const arr = new Uint8Array(data);
438 const blob = new Blob([arr], { type: 'application/pdf' });
439 const formData = new FormData();
440 formData.append(doc.filename, blob, doc.filename);
441
442 // Send the PDF blob to the server for processing
443 new Promise(function (resolve, reject) {
444 console.log('🚀 Sending PDF to server for initial processing...');
445
446 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
447 method: 'POST',
448 body: formData,
449 }).then(function (response) {
450 console.log(`📡 Server response status: ${response.status}`);
451
452 if (response.status === 200) {
453 console.log('✅ PDF successfully sent to server');
454
455 // enable Extract Document Structure button
456 const extractButton = documentStructurePanel.render.querySelector('#extractDocumentStructureButton');
457 if (extractButton) {
458 console.log('🔓 Enabling Extract Document Structure button');
459 enableButton(extractButton, true);
460 } else {
461 console.warn('⚠️ Could not find extractDocumentStructureButton in DOM');
462 }
463 resolve();
464 } else {
465 console.error(`❌ Server responded with status: ${response.status}`);
466 reject(new Error(`Server error: ${response.status}`));
467 }
468 }).catch(function (error) {
469 console.error('❌ Failed to connect to server:', error);
470 console.error('📍 Attempted URL: http://localhost:5050/server/handler.js');
471 console.error('🔍 This likely means the document structure extraction server is not running on port 5050');
472 reject(error);
473 });
474 }).catch(function (error) {
475 console.error('❌ Error in PDF upload promise:', error);
476 });
477 });
478
479 console.log('✅ WebViewer loaded successfully.');
480}).catch((error) => {
481 console.error('❌ Failed to initialize WebViewer:', error);
482});
483
1<html>
2 <head>
3 <script src='lib/webviewer.min.js'></script>
4 <title>Document Structure Extraction Demo</title>
5 </head>
6 <body style='width: 100%; height: 100%; padding: 0; margin: 0'>
7 <div id='viewer' style='width: 100%; height: 100%'></div>
8 <script src='index.js'></script>
9 </body>
10</html>
1/* Modal styles for document-structure-extraction demo */
2
3.modal-overlay {
4 position: fixed;
5 top: 0;
6 left: 0;
7 width: 100%;
8 height: 100%;
9 background-color: rgba(0, 0, 0, 0.5);
10 z-index: 1001;
11 display: flex;
12 justify-content: center;
13 align-items: center;
14}
15
16.modal-box {
17 background: white;
18 padding: 20px;
19 border-radius: 8px;
20 width: 80%;
21 max-width: 800px;
22 height: 80%;
23 max-height: 600px;
24 display: flex;
25 flex-direction: column;
26 box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
27 position: relative;
28}
29
30.modal-controls {
31 margin-bottom: 15px;
32 display: flex;
33 gap: 10px;
34 align-items: center;
35}
36
37.modal-controls button {
38 padding: 8px 16px;
39 border: none;
40 border-radius: 4px;
41 cursor: pointer;
42 font-size: 14px;
43 transition: background-color 0.2s ease;
44}
45
46.modal-controls button:not(.modal-close) {
47 background: #007cba;
48 color: white;
49}
50
51.modal-controls button:not(.modal-close):hover {
52 background: #005a8b;
53}
54
55.modal-close {
56 background: #dc3545 !important;
57 color: white !important;
58 margin-left: auto;
59}
60
61.modal-close:hover {
62 background: #b02a37 !important;
63}
64
65.modal-content {
66 background: #f8f9fa;
67 padding: 15px;
68 border-radius: 4px;
69 overflow: auto;
70 flex: 1;
71 font-family: 'Courier New', monospace;
72 white-space: pre-wrap;
73 word-wrap: break-word;
74 border: 1px solid #dee2e6;
75 margin: 0;
76 color: #000000;
77}
78
1// ES6 Compliant Syntax
2// Copilot name: GitHub Copilot, version: 1.0.0, model: GPT-4, version: 2024-06, date: 2025-09-15
3// File: server/server.js
4
5const express = require('express');
6const fs = require('fs');
7const bodyParser = require('body-parser');
8const open = (...args) => import('open').then(({ default: open }) => open(...args));
9const handler = require('./handler.js');
10const port = process.env.PORT || 5050;
11const app = express();
12const sentPdfs = 'sentPdfs';
13
14// CORS middleware to allow cross-origin requests from the playground
15app.use((req, res, next) => {
16 res.header('Access-Control-Allow-Origin', '*');
17 res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
18 res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
19
20 // Handle preflight OPTIONS requests
21 if (req.method === 'OPTIONS') {
22 res.sendStatus(200);
23 } else {
24 next();
25 }
26});
27
28app.use(bodyParser.text());
29app.use('/client', express.static('../client')); // For statically serving 'client' folder at '/'
30
31handler(app);
32
33// Run server
34const server = app.listen(port, 'localhost', (err) => {
35 if (err) {
36 console.error(err);
37 } else {
38 console.info(`Server is listening at http://localhost:${port}`);
39
40 }
41});
42
43// Server shutdown and cleanup
44function shutdown() {
45 console.log('Cleanup started...');
46
47 // Example: Close server
48 server.close(() => {
49 console.log('Server closed.');
50
51 // Removes sent PDFs folder
52 if (fs.existsSync(sentPdfs))
53 fs.rmdirSync(sentPdfs, { recursive: true });
54
55 // If no async cleanup, exit directly
56 process.exit(0);
57 });
58}
59
60// Handle shutdown signals
61process.on('SIGINT', shutdown); // Ctrl+C
62process.on('SIGTERM', shutdown); // kill command or Docker stop
63process.on('uncaughtException', (err) => {
64 console.error('Uncaught Exception:', err);
65 shutdown();
66});
1// ES6 Compliant Syntax
2// Copilot name: GitHub Copilot, version: 1.0.0, model: GPT-4, version: 2024-06, date: 2025-09-15
3// File: server/handler.js
4
5const path = require('path');
6const fs = require('fs');
7const { PDFNet } = require('@pdftron/pdfnet-node');
8
9// **Important**
10// You must get a license key from Apryse for the server to run.
11// A trial key can be obtained from:
12// https://docs.apryse.com/core/guides/get-started/trial-key
13const licenseKey = 'YOUR_LICENSE_KEY';
14const multer = require('multer');
15const { response } = require('express');
16const upload = multer();
17const serverFolder = 'server';
18const sentPdfs = 'sentPdfs';
19const serverHandler = `/${serverFolder}/handler.js`;
20
21module.exports = async (app) => {
22
23 async function initializePDFNet() {
24 // Create folder sentPdfs that will hold the sent PDFs, if it doesn't exist
25 if (!fs.existsSync(sentPdfs))
26 fs.mkdirSync(sentPdfs);
27
28 // Initialize PDFNet
29 await PDFNet.initialize(licenseKey);
30
31 // Specify the data extraction library path
32 await PDFNet.addResourceSearchPath('./node_modules/@pdftron/data-extraction/lib/');
33
34 // Check if the Apryse SDK Structured Output module is available.
35 if (await PDFNet.DataExtractionModule.isModuleAvailable(PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure))
36 console.log('Apryse SDK Structured Output module is available.');
37 else
38 console.log('Unable to run Data Extraction: Apryse SDK Structured Output module not available.');
39 }
40
41 // Removes all sent PDFs
42 async function cleanupSentPdfs() {
43 if (fs.existsSync(sentPdfs)) {
44 await fs.promises.readdir(sentPdfs).then(async files => {
45 for (const file of files) {
46 const filePath = path.join(sentPdfs, file);
47 await fs.promises.unlink(filePath);
48 }
49 });
50 }
51 }
52
53 // Handle POST request sent to '/server/handler.js'
54 // This endpoint receives the currently loaded PDF file in the Apryse webviewer, then saves it to the server
55 app.post(serverHandler, upload.any(), async (request, response) => {
56 try {
57
58 // Removes previous sent PDFs
59 await cleanupSentPdfs();
60
61 const pdf = path.resolve(__dirname, `./${sentPdfs.split('/').pop()}/${request.query.filename}`);
62 fs.writeFileSync(pdf, request.files[0].buffer);
63 response.status(200).send(`Success saving PDF file ${request.query.filename}`);
64 } catch (e) {
65 response.status(500).send(`Error saving PDF file ${request.query.filename}`);
66 }
67 response.end();
68 });
69
70 // Handle GET request sent to '/server/handler.js'
71 // This endpoint extracts JSON data of form fields from the saved PDF file, then sends it back to the client
72 app.get(serverHandler, async (request, response) => {
73 let json = null;
74 response.header('Content-Type', 'application/json');
75 try {
76 const pdf = path.resolve(__dirname, `./${sentPdfs.split('/').pop()}/${request.query.filename}`);
77 if (fs.existsSync(pdf)) {
78
79 // Process the first two pages only.
80 const options = new PDFNet.DataExtractionModule.DataExtractionOptions();
81 options.setPages("1-2"); // Extract from page 1 to 2
82
83 json = await PDFNet.DataExtractionModule.extractDataAsString(pdf, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocStructure, options);
84 }
85 response.status(200).send(json);
86 } catch (e) {
87 response.status(500).send(`Error extracting JSON data from PDF file ${request.query.filename}`);
88 }
89 response.end();
90 });
91
92 // Initialize PDFNet
93 PDFNet.runWithoutCleanup(initializePDFNet, licenseKey).then(
94 function onFulfilled() {
95 response.status(200);
96 },
97 function onRejected(error) {
98 // log error and close response
99 console.error('Error initializing PDFNet', error);
100 response.status(503).send();
101 }
102 );
103};
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales