Classify documents with custom-trained AI. This feature uses a specialized AI model to analyze documents and return a JSON output that identifies the document type. It supports multipage documents and is trained on 19 categories, primarily in English:
This demo allows you to:
Implementation steps
To add Document Classification capability in WebViewer:
Step 1: Choose your preferred web stack.
Step 2: Add the ES6 JavaScript sample code provided in this guide.
Once you generate your license key, it will automatically be included in your sample code below.
Apryse collects some data regarding your usage of the SDK for product improvement.
The data that Apryse collects include:
For clarity, no other data is collected by the SDK and Apryse has no access to the contents of your documents.
If you wish to continue without data collection, contact us and we will email you a no-tracking trial key for you to get started.
1// ES6 Compliant Syntax
2// GitHub Copilot - October 22, 2025
3// File name: document-classification/client/index.js
4
5// Global variables
6const element = document.getElementById('viewer');
7let instance = null;
8
9// Initialize WebViewer.
10WebViewer({
11 path: '/lib',
12 initialDoc: 'https://apryse.s3.us-west-1.amazonaws.com/public/files/samples/document_classification_pack.pdf',
13 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
14 licenseKey: 'YOUR_LICENSE_KEY',
15}, element).then((inst) => {
16 instance = inst;
17 instance.Core.documentViewer.addEventListener('documentLoaded', () => {
18 resultArea.textContent = "📄 Document loaded. You can now press the 'Classify Document' button to classify it.";
19 });
20});
21
22// Perform classification by sending the current PDF page to the server.
23const classifyDoc = async () => {
24 instance.UI.openElements(['loadingModal']);
25 resultArea.textContent = '⏳ Classifying document, please wait...';
26 let resultText = '';
27 // Preparation of the PDF blob to be sent to the server.
28 const doc = instance.Core.documentViewer.getDocument();
29// const currentPage = instance.Core.documentViewer.getCurrentPage();
30 const xfdfString = await instance.Core.annotationManager.exportAnnotations(); // obtaining annotations in the loaded document
31 const data = await doc.getFileData({ xfdfString });
32 const arr = new Uint8Array(data);
33 const blob = new Blob([arr], { type: 'application/pdf' });
34 const formData = new FormData();
35 formData.append(doc.filename, blob, doc.filename);
36 // Send the PDF blob to the server for processing.
37 new Promise(function (resolve, reject) {
38 console.log('Sending PDF to server for processing...');
39 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
40 method: 'POST',
41 body: formData,
42 }).then(function (response) {
43
44 if (response.status === 200) {
45 response.text().then(function (json) {
46 resultText = json;
47 resolve();
48 })
49 } else {
50 const errorText = `Server responded with status: ${response.status}`;
51 resultText = errorText + resultText;
52 console.error(resultText);
53 reject(new Error(`Server error: ${response.status}`));
54 }
55 }).catch(function (error) {
56 let errorText = 'Failed to connect to server: ' + error;
57 errorText += '\n Attempted URL: http://localhost:5050/server/handler.js';
58 errorText += '\n This likely means the Classification server is not running on port 5050';
59 console.error(errorText);
60 resultText = errorText + resultText;
61 reject(error);
62 });
63 }).catch(function (error) {
64 const errorText = ' Error in PDF upload promise: ' + error;
65 console.error(errorText);
66 resultText = errorText + resultText;
67 }).finally(function () {
68 resultArea.textContent = resultText;
69 instance.UI.closeElements(['loadingModal']);
70 });
71}
72
73// UI section
74
75// Create a container for the controls.
76const controlsContainer = document.createElement('div');
77
78// Create 2 divs inside the container for left and right sections.
79const leftDiv = document.createElement('div');
80const rightDiv = document.createElement('div');
81leftDiv.className = 'vertical-container left-panel'; // side-by-side divs using (display: inline-block) and (vertical-align: top)
82rightDiv.className = 'vertical-container right-panel';
83controlsContainer.appendChild(leftDiv);
84controlsContainer.appendChild(rightDiv);
85
86// Add description text to the left div.
87const description = document.createElement('p');
88description.textContent = "A demo of Apryse Server SDK's Document Classification, powered by custom trained AI. The document will be analyzed and a resulting JSON will identify the document type. Supports multi page documents.";
89leftDiv.appendChild(description);
90leftDiv.appendChild(document.createElement('br'));
91
92// Add classify document button to the left div.
93const classifyDocButton = document.createElement('button');
94classifyDocButton.className = 'btn';
95classifyDocButton.textContent = 'Classify Document';
96classifyDocButton.onclick = async () => {
97 await classifyDoc();
98};
99leftDiv.appendChild(classifyDocButton);
100
101leftDiv.appendChild(document.createElement('br'));
102leftDiv.appendChild(document.createElement('br'));
103
104const note = document.createElement('p');
105note.innerHTML = "<b>Note: only the first 2 pages will be processed</b>.";
106leftDiv.appendChild(note);
107
108const resultArea = document.createElement('textarea');
109resultArea.className = 'result-area';
110resultArea.readOnly = true;
111resultArea.textContent = "Classification result will appear here.";
112rightDiv.appendChild(resultArea);
113
114element.insertBefore(controlsContainer, element.firstChild);
115
1/* side-by-side divs */
2.vertical-container {
3 display: inline-block;
4 vertical-align: top;
5}
6
7/* Button Styles */
8.btn {
9 background-color: #007bff;
10 margin: 0 10px;
11 padding: 5px 10px;
12 border: 1px solid #ccc;
13 border-radius: 4px;
14 cursor: pointer;
15 font-size: 14px;
16 transition: all 0.2s ease;
17 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
18 color: white;
19}
20
21.btn:hover {
22 background-color: #0056b3;
23 transform: translateY(-1px);
24 box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
25}
26
27.btn:active {
28 transform: translateY(1px);
29 box-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
30}
31
32.btn:disabled {
33 background-color: #ccc;
34 cursor: not-allowed;
35 box-shadow: none;
36}
37
38/* Layout Styles */
39.left-panel {
40 width: 40%;
41}
42
43.right-panel {
44 width: 60%; /* Right div is wider to accommodate JSON data display */
45}
46
47.result-area {
48 width: 100%;
49 height: 150px;
50 white-space: pre-wrap;
51 font-family: 'Courier New', monospace;
52 font-size: 12px;
53 overflow: auto;
54 background-color: gray;
55 color: white;
56}
57
58/* Responsive Design */
59@media (max-width: 768px) {
60 .btn {
61 width: 100%;
62 margin: 5px 0;
63 }
64}
65
1const { PDFNet } = require('@pdftron/pdfnet-node');
2const path = require('path');
3const fs = require('fs');
4
5// **Important**
6// You must get a license key from Apryse for the server to run.
7// A trial key can be obtained from:
8// https://docs.apryse.com/core/guides/get-started/trial-key
9const licenseKey = 'YOUR_LICENSE_KEY';
10const multer = require('multer');
11const { response } = require('express');
12const upload = multer();
13const serverFolder = 'server';
14const sentDocuments = 'sentDocuments';
15const serverHandler = `/${serverFolder}/handler.js`;
16
17// Initialize PDFNet once when the module loads.
18let isInitialized = false;
19let initializationError = null;
20
21async function initializeServer() {
22 try {
23 console.log('Initializing PDFNet Data Extraction module...');
24
25 // Check if DataExtractionModule is available.
26 if (!PDFNet.DataExtractionModule) {
27 throw new Error('DataExtractionModule not available');
28 }
29
30 // Check if document classification is available.
31 const isAvailable = await PDFNet.DataExtractionModule.isModuleAvailable(
32 PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification);
33 if (!isAvailable) {
34 throw new Error('Document Classification module not available');
35 }
36
37 isInitialized = true;
38 } catch (error) {
39 console.error('❌ Error initializing Data Extraction module:', error.message);
40 initializationError = error;
41 }
42}
43
44module.exports = async (app) => {
45
46 // Handle POST request sent to '/server/handler.js'.
47 // This endpoint receives the currently loaded PDF file in the Apryse webviewer and sends
48 // its classification back to the client as JSON data.
49 app.post(serverHandler, upload.any(), async (request, response) => {
50 try {
51 // Check if PDFNet is initialized.
52 if (!isInitialized) {
53 return response.status(503).json({
54 error: 'Server not properly initialized',
55 details: initializationError?.message
56 });
57 }
58
59 if (!fs.existsSync(sentDocuments))
60 fs.mkdirSync(sentDocuments);
61
62 const sentPdf = path.resolve(__dirname, `./${sentDocuments.split('/').pop()}/${request.query.filename}`);
63 fs.writeFileSync(sentPdf, request.files[0].buffer);
64
65 response.header('Content-Type', 'application/json');
66
67 const opts = new PDFNet.DataExtractionModule.DataExtractionOptions();
68 opts.setPages('1-2'); // Process 2 pages.
69 console.log(opts);
70 const json = await PDFNet.DataExtractionModule.extractDataAsString(
71 sentPdf, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification, opts);
72
73 await fs.promises.unlink(sentPdf); // Delete the document after processing.
74 response.status(200).send(json);
75 } catch (e) {
76 console.error('Error processing document:', e);
77 response.status(500).json({
78 error: `Error extracting JSON text from PDF file ${request.query.filename}`,
79 details: e.message
80 });
81 }
82 });
83};
84
85// Initialize PDFNet when module loads.
86PDFNet.runWithoutCleanup(initializeServer, licenseKey).catch(error => {
87 console.error('❌ Fatal error initializing PDFNet:', error);
88 initializationError = error;
89});
1
2const express = require('express');
3const fs = require('fs');
4const bodyParser = require('body-parser');
5const open = (...args) => import('open').then(({ default: open }) => open(...args));
6const handler = require('./handler.js');
7const port = process.env.PORT || 5050;
8const app = express();
9const sentPdfs = 'sentPdfs';
10
11// CORS middleware to allow cross-origin requests from the playground.
12app.use((req, res, next) => {
13 res.header('Access-Control-Allow-Origin', '*');
14 res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
15 res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
16
17 // Handle preflight OPTIONS requests.
18 if (req.method === 'OPTIONS') {
19 res.sendStatus(200);
20 } else {
21 next();
22 }
23});
24
25app.use(bodyParser.text());
26app.use('/client', express.static('../client')); // For statically serving 'client' folder at '/'.
27
28handler(app);
29
30// Run server.
31const server = app.listen(port, 'localhost', (err) => {
32 if (err) {
33 console.error(err);
34 } else {
35 console.info(`Server is listening at http://localhost:${port}`);
36
37 }
38});
39
40// Server shutdown and cleanup
41function shutdown() {
42 console.log('Cleanup started...');
43
44 // Example: Close server
45 server.close(() => {
46 console.log('Server closed.');
47
48 // Removes sent PDFs folder.
49 if (fs.existsSync(sentPdfs))
50 fs.rmdirSync(sentPdfs, { recursive: true });
51
52 // If no async cleanup, exit directly.
53 process.exit(0);
54 });
55}
56
57// Handle shutdown signals.
58process.on('SIGINT', shutdown); // Ctrl+C
59process.on('SIGTERM', shutdown); // Kill command or Docker stop
60process.on('uncaughtException', (err) => {
61 console.error('Uncaught Exception:', err);
62 shutdown();
63});
1{
2 "name": "document-classification-server",
3 "version": "1.0.0",
4 "description": "Document Classification Demo Server Component",
5 "main": "server.js",
6 "scripts": {
7 "start": "node server.js",
8 "dev": "node server.js"
9 },
10 "dependencies": {
11 "@pdftron/data-extraction": "^11.8.0",
12 "@pdftron/pdfnet-node": "^11.8.0",
13 "body-parser": "^1.20.2",
14 "express": "^4.18.2",
15 "multer": "^1.4.4",
16 "open": "^9.1.0"
17 },
18 "keywords": [
19 "document-classification",
20 "pdf",
21 "server",
22 "pdftron",
23 "webviewer"
24 ],
25 "author": "Apryse",
26 "license": "MIT"
27}
28
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales