Classify documents with custom-trained AI. This feature uses a specialized AI model to analyze documents and return a JSON output that identifies the document type. It supports multipage documents and is trained on 19 categories, primarily in English:
This demo allows you to:
Implementation steps
To add Document Classification capability in WebViewer:
Step 1: Choose your preferred web stack
Step 2: Add the ES6 JavaScript sample code provided in this guide
1// ES6 Compliant Syntax
2// GitHub Copilot - October 22, 2025
3// File name: document-classification/client/index.js
4
5// Global variables
6const element = document.getElementById('viewer');
7let instance = null;
8
9// Initialize WebViewer
10WebViewer({
11 path: '/lib',
12 initialDoc: 'https://apryse.s3.us-west-1.amazonaws.com/public/files/samples/document_classification_pack.pdf',
13 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
14 licenseKey: 'YOUR_LICENSE_KEY',
15}, element).then((inst) => {
16 instance = inst;
17 instance.Core.documentViewer.addEventListener('documentLoaded', () => {
18 resultArea.textContent = "📄 Document loaded. You can now press the 'Classify Document' button to classify it.";
19 });
20});
21
22// Perform classification by sending the current PDF page to the server
23const classifyDoc = async () => {
24 instance.UI.openElements(['loadingModal']);
25 resultArea.textContent = '⏳ Classifying document, please wait...';
26 let resultText = '';
27 // Preparation of the PDF blob to be sent to the server
28 const doc = instance.Core.documentViewer.getDocument();
29// const currentPage = instance.Core.documentViewer.getCurrentPage();
30 const xfdfString = await instance.Core.annotationManager.exportAnnotations(); // obtaining annotations in the loaded document
31 const data = await doc.getFileData({ xfdfString });
32 const arr = new Uint8Array(data);
33 const blob = new Blob([arr], { type: 'application/pdf' });
34 const formData = new FormData();
35 formData.append(doc.filename, blob, doc.filename);
36 // Send the PDF blob to the server for processing
37 new Promise(function (resolve, reject) {
38 console.log('Sending PDF to server for processing...');
39 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
40 method: 'POST',
41 body: formData,
42 }).then(function (response) {
43
44 if (response.status === 200) {
45 response.text().then(function (json) {
46 resultText = json;
47 resolve();
48 })
49 } else {
50 const errorText = `Server responded with status: ${response.status}`;
51 resultText = errorText + resultText;
52 console.error(resultText);
53 reject(new Error(`Server error: ${response.status}`));
54 }
55 }).catch(function (error) {
56 let errorText = 'Failed to connect to server: ' + error;
57 errorText += '\n Attempted URL: http://localhost:5050/server/handler.js';
58 errorText += '\n This likely means the Classification server is not running on port 5050';
59 console.error(errorText);
60 resultText = errorText + resultText;
61 reject(error);
62 });
63 }).catch(function (error) {
64 const errorText = ' Error in PDF upload promise: ' + error;
65 console.error(errorText);
66 resultText = errorText + resultText;
67 }).finally(function () {
68 resultArea.textContent = resultText;
69 instance.UI.closeElements(['loadingModal']);
70 });
71}
72
73// UI section
74
75// Create a container for the controls
76const controlsContainer = document.createElement('div');
77
78// Create 2 divs inside the container for left and right sections
79const leftDiv = document.createElement('div');
80const rightDiv = document.createElement('div');
81leftDiv.className = 'vertical-container left-panel'; // side-by-side divs using (display: inline-block) and (vertical-align: top)
82rightDiv.className = 'vertical-container right-panel';
83controlsContainer.appendChild(leftDiv);
84controlsContainer.appendChild(rightDiv);
85
86// Add description text to the left div
87const description = document.createElement('p');
88description.textContent = "A demo of Apryse Server SDK's Document Classification, powered by custom trained AI. The document will be analyzed and a resulting JSON will identify the document type. Supports multi page documents.";
89leftDiv.appendChild(description);
90leftDiv.appendChild(document.createElement('br'));
91
92// Add classify document button to the left div
93const classifyDocButton = document.createElement('button');
94classifyDocButton.className = 'btn';
95classifyDocButton.textContent = 'Classify Document';
96classifyDocButton.onclick = async () => {
97 await classifyDoc();
98};
99leftDiv.appendChild(classifyDocButton);
100
101leftDiv.appendChild(document.createElement('br'));
102leftDiv.appendChild(document.createElement('br'));
103
104const note = document.createElement('p');
105note.innerHTML = "<b>Note: only the first 2 pages will be processed</b>.";
106leftDiv.appendChild(note);
107
108const resultArea = document.createElement('textarea');
109resultArea.className = 'result-area';
110resultArea.readOnly = true;
111resultArea.textContent = "Classification result will appear here.";
112rightDiv.appendChild(resultArea);
113
114element.insertBefore(controlsContainer, element.firstChild);
115
1/* side-by-side divs */
2.vertical-container {
3 display: inline-block;
4 vertical-align: top;
5}
6
7/* Button Styles */
8.btn {
9 background-color: #007bff;
10 margin: 0 10px;
11 padding: 5px 10px;
12 border: 1px solid #ccc;
13 border-radius: 4px;
14 cursor: pointer;
15 font-size: 14px;
16 transition: all 0.2s ease;
17 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
18 color: white;
19}
20
21.btn:hover {
22 background-color: #0056b3;
23 transform: translateY(-1px);
24 box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
25}
26
27.btn:active {
28 transform: translateY(1px);
29 box-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
30}
31
32.btn:disabled {
33 background-color: #ccc;
34 cursor: not-allowed;
35 box-shadow: none;
36}
37
38/* Layout Styles */
39.left-panel {
40 width: 40%;
41}
42
43.right-panel {
44 width: 60%; /* right div is wider to accommodate JSON data display */
45}
46
47.result-area {
48 width: 100%;
49 height: 150px;
50 white-space: pre-wrap;
51 font-family: 'Courier New', monospace;
52 font-size: 12px;
53 overflow: auto;
54 background-color: gray;
55 color: white;
56}
57
58/* Responsive Design */
59@media (max-width: 768px) {
60 .btn {
61 width: 100%;
62 margin: 5px 0;
63 }
64}
65
1const { PDFNet } = require('@pdftron/pdfnet-node');
2const path = require('path');
3const fs = require('fs');
4
5// **Important**
6// You must get a license key from Apryse for the server to run.
7// A trial key can be obtained from:
8// https://docs.apryse.com/core/guides/get-started/trial-key
9const licenseKey = 'YOUR_LICENSE_KEY';
10const multer = require('multer');
11const { response } = require('express');
12const upload = multer();
13const serverFolder = 'server';
14const sentDocuments = 'sentDocuments';
15const serverHandler = `/${serverFolder}/handler.js`;
16
17// Initialize PDFNet once when the module loads
18let isInitialized = false;
19let initializationError = null;
20
21async function initializeServer() {
22 try {
23 console.log('Initializing PDFNet Data Extraction module...');
24
25 // Check if DataExtractionModule is available
26 if (!PDFNet.DataExtractionModule) {
27 throw new Error('DataExtractionModule not available');
28 }
29
30 // Check if document classification is available
31 const isAvailable = await PDFNet.DataExtractionModule.isModuleAvailable(
32 PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification);
33 if (!isAvailable) {
34 throw new Error('Document Classification module not available');
35 }
36
37 isInitialized = true;
38 } catch (error) {
39 console.error('❌ Error initializing Data Extraction module:', error.message);
40 initializationError = error;
41 }
42}
43
44module.exports = async (app) => {
45
46 // Handle POST request sent to '/server/handler.js'
47 // This endpoint receives the currently loaded PDF file in the Apryse webviewer and sends
48 // its classification back to the client as JSON data
49 app.post(serverHandler, upload.any(), async (request, response) => {
50 try {
51 // Check if PDFNet is initialized
52 if (!isInitialized) {
53 return response.status(503).json({
54 error: 'Server not properly initialized',
55 details: initializationError?.message
56 });
57 }
58
59 if (!fs.existsSync(sentDocuments))
60 fs.mkdirSync(sentDocuments);
61
62 const sentPdf = path.resolve(__dirname, `./${sentDocuments.split('/').pop()}/${request.query.filename}`);
63 fs.writeFileSync(sentPdf, request.files[0].buffer);
64
65 response.header('Content-Type', 'application/json');
66
67 const opts = new PDFNet.DataExtractionModule.DataExtractionOptions();
68 opts.setPages('1-2'); // process 2 pages
69 console.log(opts);
70 const json = await PDFNet.DataExtractionModule.extractDataAsString(
71 sentPdf, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification, opts);
72
73 await fs.promises.unlink(sentPdf); // delete the document after processing
74 response.status(200).send(json);
75 } catch (e) {
76 console.error('Error processing document:', e);
77 response.status(500).json({
78 error: `Error extracting JSON text from PDF file ${request.query.filename}`,
79 details: e.message
80 });
81 }
82 });
83};
84
85// Initialize PDFNet when module loads
86PDFNet.runWithoutCleanup(initializeServer, licenseKey).catch(error => {
87 console.error('❌ Fatal error initializing PDFNet:', error);
88 initializationError = error;
89});
1
2const express = require('express');
3const fs = require('fs');
4const bodyParser = require('body-parser');
5const open = (...args) => import('open').then(({ default: open }) => open(...args));
6const handler = require('./handler.js');
7const port = process.env.PORT || 5050;
8const app = express();
9const sentPdfs = 'sentPdfs';
10
11// CORS middleware to allow cross-origin requests from the playground
12app.use((req, res, next) => {
13 res.header('Access-Control-Allow-Origin', '*');
14 res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
15 res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
16
17 // Handle preflight OPTIONS requests
18 if (req.method === 'OPTIONS') {
19 res.sendStatus(200);
20 } else {
21 next();
22 }
23});
24
25app.use(bodyParser.text());
26app.use('/client', express.static('../client')); // For statically serving 'client' folder at '/'
27
28handler(app);
29
30// Run server
31const server = app.listen(port, 'localhost', (err) => {
32 if (err) {
33 console.error(err);
34 } else {
35 console.info(`Server is listening at http://localhost:${port}`);
36
37 }
38});
39
40// Server shutdown and cleanup
41function shutdown() {
42 console.log('Cleanup started...');
43
44 // Example: Close server
45 server.close(() => {
46 console.log('Server closed.');
47
48 // Removes sent PDFs folder
49 if (fs.existsSync(sentPdfs))
50 fs.rmdirSync(sentPdfs, { recursive: true });
51
52 // If no async cleanup, exit directly
53 process.exit(0);
54 });
55}
56
57// Handle shutdown signals
58process.on('SIGINT', shutdown); // Ctrl+C
59process.on('SIGTERM', shutdown); // kill command or Docker stop
60process.on('uncaughtException', (err) => {
61 console.error('Uncaught Exception:', err);
62 shutdown();
63});
1{
2 "name": "document-classification-server",
3 "version": "1.0.0",
4 "description": "Document Classification Demo Server Component",
5 "main": "server.js",
6 "scripts": {
7 "start": "node server.js",
8 "dev": "node server.js"
9 },
10 "dependencies": {
11 "@pdftron/data-extraction": "^11.8.0",
12 "@pdftron/pdfnet-node": "^11.8.0",
13 "body-parser": "^1.20.2",
14 "express": "^4.18.2",
15 "multer": "^1.4.4",
16 "open": "^9.1.0"
17 },
18 "keywords": [
19 "document-classification",
20 "pdf",
21 "server",
22 "pdftron",
23 "webviewer"
24 ],
25 "author": "Apryse",
26 "license": "MIT"
27}
28
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales