Classify documents with custom-trained AI. This feature uses a specialized AI model to analyze documents and return a JSON output that identifies the document type. It supports multipage documents and is trained on 19 categories, primarily in English:
This demo allows you to:
Implementation steps
To add Document Classification capability in WebViewer:
Step 1: Choose your preferred web stack
Step 2: Add the ES6 JavaScript sample code provided in this guide
Once you generate your license key, it will automatically be included in your sample code below.
Apryse collects some data regarding your usage of the SDK for product improvement.
The data that Apryse collects include:
For clarity, no other data is collected by the SDK and Apryse has no access to the contents of your documents.
If you wish to continue without data collection, contact us and we will email you a no-tracking trial key for you to get started.
1// ES6 Compliant Syntax
2// GitHub Copilot - October 22, 2025
3// File name: document-classification/client/index.js
4
5// Global variables
6const element = document.getElementById('viewer');
7let instance = null;
8
9// Initialize WebViewer
10WebViewer({
11 path: '/lib',
12 initialDoc: 'https://apryse.s3.us-west-1.amazonaws.com/public/files/samples/document_classification_pack.pdf',
13 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
14 licenseKey: 'YOUR_LICENSE_KEY',
15}, element).then((inst) => {
16 instance = inst;
17 instance.Core.documentViewer.addEventListener('documentLoaded', () => {
18 resultArea.textContent = "📄 Document loaded. You can now press the 'Classify Document' button to classify it.";
19 });
20});
21
22// Perform classification by sending the current PDF page to the server
23const classifyDoc = async () => {
24 instance.UI.openElements(['loadingModal']);
25 resultArea.textContent = '⏳ Classifying document, please wait...';
26 let resultText = '';
27 // Preparation of the PDF blob to be sent to the server
28 const doc = instance.Core.documentViewer.getDocument();
29// const currentPage = instance.Core.documentViewer.getCurrentPage();
30 const xfdfString = await instance.Core.annotationManager.exportAnnotations(); // obtaining annotations in the loaded document
31 const data = await doc.getFileData({ xfdfString });
32 const arr = new Uint8Array(data);
33 const blob = new Blob([arr], { type: 'application/pdf' });
34 const formData = new FormData();
35 formData.append(doc.filename, blob, doc.filename);
36 // Send the PDF blob to the server for processing
37 new Promise(function (resolve, reject) {
38 console.log('Sending PDF to server for processing...');
39 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
40 method: 'POST',
41 body: formData,
42 }).then(function (response) {
43
44 if (response.status === 200) {
45 response.text().then(function (json) {
46 resultText = json;
47 resolve();
48 })
49 } else {
50 const errorText = `Server responded with status: ${response.status}`;
51 resultText = errorText + resultText;
52 console.error(resultText);
53 reject(new Error(`Server error: ${response.status}`));
54 }
55 }).catch(function (error) {
56 let errorText = 'Failed to connect to server: ' + error;
57 errorText += '\n Attempted URL: http://localhost:5050/server/handler.js';
58 errorText += '\n This likely means the Classification server is not running on port 5050';
59 console.error(errorText);
60 resultText = errorText + resultText;
61 reject(error);
62 });
63 }).catch(function (error) {
64 const errorText = ' Error in PDF upload promise: ' + error;
65 console.error(errorText);
66 resultText = errorText + resultText;
67 }).finally(function () {
68 resultArea.textContent = resultText;
69 instance.UI.closeElements(['loadingModal']);
70 });
71}
72
73// UI section
74
75// Create a container for the controls
76const controlsContainer = document.createElement('div');
77
78// Create 2 divs inside the container for left and right sections
79const leftDiv = document.createElement('div');
80const rightDiv = document.createElement('div');
81leftDiv.className = 'vertical-container left-panel'; // side-by-side divs using (display: inline-block) and (vertical-align: top)
82rightDiv.className = 'vertical-container right-panel';
83controlsContainer.appendChild(leftDiv);
84controlsContainer.appendChild(rightDiv);
85
86// Add description text to the left div
87const description = document.createElement('p');
88description.textContent = "A demo of Apryse Server SDK's Document Classification, powered by custom trained AI. The document will be analyzed and a resulting JSON will identify the document type. Supports multi page documents.";
89leftDiv.appendChild(description);
90leftDiv.appendChild(document.createElement('br'));
91
92// Add classify document button to the left div
93const classifyDocButton = document.createElement('button');
94classifyDocButton.className = 'btn';
95classifyDocButton.textContent = 'Classify Document';
96classifyDocButton.onclick = async () => {
97 await classifyDoc();
98};
99leftDiv.appendChild(classifyDocButton);
100
101leftDiv.appendChild(document.createElement('br'));
102leftDiv.appendChild(document.createElement('br'));
103
104const note = document.createElement('p');
105note.innerHTML = "<b>Note: only the first 2 pages will be processed</b>.";
106leftDiv.appendChild(note);
107
108const resultArea = document.createElement('textarea');
109resultArea.className = 'result-area';
110resultArea.readOnly = true;
111resultArea.textContent = "Classification result will appear here.";
112rightDiv.appendChild(resultArea);
113
114element.insertBefore(controlsContainer, element.firstChild);
115
1/* side-by-side divs */
2.vertical-container {
3 display: inline-block;
4 vertical-align: top;
5}
6
7/* Button Styles */
8.btn {
9 background-color: #007bff;
10 margin: 0 10px;
11 padding: 5px 10px;
12 border: 1px solid #ccc;
13 border-radius: 4px;
14 cursor: pointer;
15 font-size: 14px;
16 transition: all 0.2s ease;
17 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
18 color: white;
19}
20
21.btn:hover {
22 background-color: #0056b3;
23 transform: translateY(-1px);
24 box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
25}
26
27.btn:active {
28 transform: translateY(1px);
29 box-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
30}
31
32.btn:disabled {
33 background-color: #ccc;
34 cursor: not-allowed;
35 box-shadow: none;
36}
37
38/* Layout Styles */
39.left-panel {
40 width: 40%;
41}
42
43.right-panel {
44 width: 60%; /* right div is wider to accommodate JSON data display */
45}
46
47.result-area {
48 width: 100%;
49 height: 150px;
50 white-space: pre-wrap;
51 font-family: 'Courier New', monospace;
52 font-size: 12px;
53 overflow: auto;
54 background-color: gray;
55 color: white;
56}
57
58/* Responsive Design */
59@media (max-width: 768px) {
60 .btn {
61 width: 100%;
62 margin: 5px 0;
63 }
64}
65
1const { PDFNet } = require('@pdftron/pdfnet-node');
2const path = require('path');
3const fs = require('fs');
4
5// **Important**
6// You must get a license key from Apryse for the server to run.
7// A trial key can be obtained from:
8// https://docs.apryse.com/core/guides/get-started/trial-key
9const licenseKey = 'YOUR_LICENSE_KEY';
10const multer = require('multer');
11const { response } = require('express');
12const upload = multer();
13const serverFolder = 'server';
14const sentDocuments = 'sentDocuments';
15const serverHandler = `/${serverFolder}/handler.js`;
16
17// Initialize PDFNet once when the module loads
18let isInitialized = false;
19let initializationError = null;
20
21async function initializeServer() {
22 try {
23 console.log('Initializing PDFNet Data Extraction module...');
24
25 // Check if DataExtractionModule is available
26 if (!PDFNet.DataExtractionModule) {
27 throw new Error('DataExtractionModule not available');
28 }
29
30 // Check if document classification is available
31 const isAvailable = await PDFNet.DataExtractionModule.isModuleAvailable(
32 PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification);
33 if (!isAvailable) {
34 throw new Error('Document Classification module not available');
35 }
36
37 isInitialized = true;
38 } catch (error) {
39 console.error('❌ Error initializing Data Extraction module:', error.message);
40 initializationError = error;
41 }
42}
43
44module.exports = async (app) => {
45
46 // Handle POST request sent to '/server/handler.js'
47 // This endpoint receives the currently loaded PDF file in the Apryse webviewer and sends
48 // its classification back to the client as JSON data
49 app.post(serverHandler, upload.any(), async (request, response) => {
50 try {
51 // Check if PDFNet is initialized
52 if (!isInitialized) {
53 return response.status(503).json({
54 error: 'Server not properly initialized',
55 details: initializationError?.message
56 });
57 }
58
59 if (!fs.existsSync(sentDocuments))
60 fs.mkdirSync(sentDocuments);
61
62 const sentPdf = path.resolve(__dirname, `./${sentDocuments.split('/').pop()}/${request.query.filename}`);
63 fs.writeFileSync(sentPdf, request.files[0].buffer);
64
65 response.header('Content-Type', 'application/json');
66
67 const opts = new PDFNet.DataExtractionModule.DataExtractionOptions();
68 opts.setPages('1-2'); // process 2 pages
69 console.log(opts);
70 const json = await PDFNet.DataExtractionModule.extractDataAsString(
71 sentPdf, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification, opts);
72
73 await fs.promises.unlink(sentPdf); // delete the document after processing
74 response.status(200).send(json);
75 } catch (e) {
76 console.error('Error processing document:', e);
77 response.status(500).json({
78 error: `Error extracting JSON text from PDF file ${request.query.filename}`,
79 details: e.message
80 });
81 }
82 });
83};
84
85// Initialize PDFNet when module loads
86PDFNet.runWithoutCleanup(initializeServer, licenseKey).catch(error => {
87 console.error('❌ Fatal error initializing PDFNet:', error);
88 initializationError = error;
89});
1
2const express = require('express');
3const fs = require('fs');
4const bodyParser = require('body-parser');
5const open = (...args) => import('open').then(({ default: open }) => open(...args));
6const handler = require('./handler.js');
7const port = process.env.PORT || 5050;
8const app = express();
9const sentPdfs = 'sentPdfs';
10
11// CORS middleware to allow cross-origin requests from the playground
12app.use((req, res, next) => {
13 res.header('Access-Control-Allow-Origin', '*');
14 res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
15 res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
16
17 // Handle preflight OPTIONS requests
18 if (req.method === 'OPTIONS') {
19 res.sendStatus(200);
20 } else {
21 next();
22 }
23});
24
25app.use(bodyParser.text());
26app.use('/client', express.static('../client')); // For statically serving 'client' folder at '/'
27
28handler(app);
29
30// Run server
31const server = app.listen(port, 'localhost', (err) => {
32 if (err) {
33 console.error(err);
34 } else {
35 console.info(`Server is listening at http://localhost:${port}`);
36
37 }
38});
39
40// Server shutdown and cleanup
41function shutdown() {
42 console.log('Cleanup started...');
43
44 // Example: Close server
45 server.close(() => {
46 console.log('Server closed.');
47
48 // Removes sent PDFs folder
49 if (fs.existsSync(sentPdfs))
50 fs.rmdirSync(sentPdfs, { recursive: true });
51
52 // If no async cleanup, exit directly
53 process.exit(0);
54 });
55}
56
57// Handle shutdown signals
58process.on('SIGINT', shutdown); // Ctrl+C
59process.on('SIGTERM', shutdown); // kill command or Docker stop
60process.on('uncaughtException', (err) => {
61 console.error('Uncaught Exception:', err);
62 shutdown();
63});
1{
2 "name": "document-classification-server",
3 "version": "1.0.0",
4 "description": "Document Classification Demo Server Component",
5 "main": "server.js",
6 "scripts": {
7 "start": "node server.js",
8 "dev": "node server.js"
9 },
10 "dependencies": {
11 "@pdftron/data-extraction": "^11.8.0",
12 "@pdftron/pdfnet-node": "^11.8.0",
13 "body-parser": "^1.20.2",
14 "express": "^4.18.2",
15 "multer": "^1.4.4",
16 "open": "^9.1.0"
17 },
18 "keywords": [
19 "document-classification",
20 "pdf",
21 "server",
22 "pdftron",
23 "webviewer"
24 ],
25 "author": "Apryse",
26 "license": "MIT"
27}
28
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales