Classify documents with custom-trained AI. This feature uses a specialized AI model to analyze documents and return a JSON output that identifies the document type. It supports multipage documents and is trained on 19 categories, primarily in English:
This demo allows you to:
Implementation steps
To add Document Classification capability in WebViewer:
Step 1: Choose your preferred web stack.
Step 2: Add the ES6 JavaScript sample code provided in this guide.
Once you generate your license key, it will automatically be included in your sample code below.
Apryse collects some data regarding your usage of the SDK for product improvement.
The data that Apryse collects include:
For clarity, no other data is collected by the SDK and Apryse has no access to the contents of your documents.
If you wish to continue without data collection, contact us and we will email you a no-tracking trial key for you to get started.
1// ES6 Compliant Syntax
2// GitHub Copilot - October 22, 2025
3// File name: document-classification/client/index.js
4
5const licenseKey = 'YOUR_WEBVIEWER_LICENSE_KEY';
6
7// Global variables
8const element = document.getElementById('viewer');
9let instance = null;
10
11// Initialize WebViewer
12WebViewer({
13 path: '/lib',
14 initialDoc: 'https://apryse.s3.us-west-1.amazonaws.com/public/files/samples/document_classification_pack.pdf',
15 enableFilePicker: true, // Enable file picker to open files. In WebViewer -> menu icon -> Open File
16 licenseKey: licenseKey,
17}, element).then((inst) => {
18 instance = inst;
19 instance.Core.documentViewer.addEventListener('documentLoaded', () => {
20 resultArea.textContent = "📄 Document loaded. You can now press the 'Classify Document' button to classify it.";
21 });
22});
23
24// Perform classification by sending the current PDF page to the server
25const classifyDoc = async () => {
26 instance.UI.openElements(['loadingModal']);
27 resultArea.textContent = '⏳ Classifying document, please wait...';
28 let resultText = '';
29 // Preparation of the PDF blob to be sent to the server
30 const doc = instance.Core.documentViewer.getDocument();
31// const currentPage = instance.Core.documentViewer.getCurrentPage();
32 const xfdfString = await instance.Core.annotationManager.exportAnnotations(); // obtaining annotations in the loaded document
33 const data = await doc.getFileData({ xfdfString });
34 const arr = new Uint8Array(data);
35 const blob = new Blob([arr], { type: 'application/pdf' });
36 const formData = new FormData();
37 formData.append(doc.filename, blob, doc.filename);
38 // Send the PDF blob to the server for processing
39 new Promise(function (resolve, reject) {
40 console.log('Sending PDF to server for processing...');
41 fetch(`http://localhost:5050/server/handler.js?filename=${doc.filename}`, {
42 method: 'POST',
43 body: formData,
44 }).then(function (response) {
45
46 if (response.status === 200) {
47 response.text().then(function (json) {
48 resultText = json;
49 resolve();
50 })
51 } else {
52 const errorText = `Server responded with status: ${response.status}`;
53 resultText = errorText + resultText;
54 console.error(resultText);
55 reject(new Error(`Server error: ${response.status}`));
56 }
57 }).catch(function (error) {
58 let errorText = 'Failed to connect to server: ' + error;
59 errorText += '\n Attempted URL: http://localhost:5050/server/handler.js';
60 errorText += '\n This likely means the Classification server is not running on port 5050';
61 console.error(errorText);
62 resultText = errorText + resultText;
63 reject(error);
64 });
65 }).catch(function (error) {
66 const errorText = ' Error in PDF upload promise: ' + error;
67 console.error(errorText);
68 resultText = errorText + resultText;
69 }).finally(function () {
70 resultArea.textContent = resultText;
71 instance.UI.closeElements(['loadingModal']);
72 });
73}
74
75// UI section
76
77// Create a container for the controls
78const controlsContainer = document.createElement('div');
79
80// Create 2 divs inside the container for left and right sections
81const leftDiv = document.createElement('div');
82const rightDiv = document.createElement('div');
83leftDiv.className = 'vertical-container left-panel'; // side-by-side divs using (display: inline-block) and (vertical-align: top)
84rightDiv.className = 'vertical-container right-panel';
85controlsContainer.appendChild(leftDiv);
86controlsContainer.appendChild(rightDiv);
87
88// Add description text to the left div
89const description = document.createElement('p');
90description.textContent = "A demo of Apryse Server SDK's Document Classification, powered by custom trained AI. The document will be analyzed and a resulting JSON will identify the document type. Supports multi page documents.";
91leftDiv.appendChild(description);
92leftDiv.appendChild(document.createElement('br'));
93
94// Add classify document button to the left div
95const classifyDocButton = document.createElement('button');
96classifyDocButton.className = 'btn';
97classifyDocButton.textContent = 'Classify Document';
98classifyDocButton.onclick = async () => {
99 await classifyDoc();
100};
101leftDiv.appendChild(classifyDocButton);
102
103leftDiv.appendChild(document.createElement('br'));
104leftDiv.appendChild(document.createElement('br'));
105
106const note = document.createElement('p');
107note.innerHTML = "<b>Note: only the first 2 pages will be processed</b>.";
108leftDiv.appendChild(note);
109
110const resultArea = document.createElement('textarea');
111resultArea.className = 'result-area';
112resultArea.readOnly = true;
113resultArea.textContent = "Classification result will appear here.";
114rightDiv.appendChild(resultArea);
115
116element.insertBefore(controlsContainer, element.firstChild);
117
1/* side-by-side divs */
2.vertical-container {
3 display: inline-block;
4 vertical-align: top;
5}
6
7/* Button Styles */
8.btn {
9 background-color: #007bff;
10 margin: 0 10px;
11 padding: 5px 10px;
12 border: 1px solid #ccc;
13 border-radius: 4px;
14 cursor: pointer;
15 font-size: 14px;
16 transition: all 0.2s ease;
17 box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
18 color: white;
19}
20
21.btn:hover {
22 background-color: #0056b3;
23 transform: translateY(-1px);
24 box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
25}
26
27.btn:active {
28 transform: translateY(1px);
29 box-shadow: 0 1px 2px rgba(0, 0, 0, 0.2);
30}
31
32.btn:disabled {
33 background-color: #ccc;
34 cursor: not-allowed;
35 box-shadow: none;
36}
37
38/* Layout Styles */
39.left-panel {
40 width: 40%;
41}
42
43.right-panel {
44 width: 60%; /* right div is wider to accommodate JSON data display */
45}
46
47.result-area {
48 width: 100%;
49 height: 150px;
50 white-space: pre-wrap;
51 font-family: 'Courier New', monospace;
52 font-size: 12px;
53 overflow: auto;
54 background-color: gray;
55 color: white;
56}
57
58/* Responsive Design */
59@media (max-width: 768px) {
60 .btn {
61 width: 100%;
62 margin: 5px 0;
63 }
64}
65
1const { PDFNet } = require('@pdftron/pdfnet-node');
2const path = require('path');
3const fs = require('fs');
4
5// **Important**
6// You must get a license key from Apryse for the server to run.
7// A trial key can be obtained from:
8// https://docs.apryse.com/core/guides/get-started/trial-key
9const licenseKey = 'YOUR_SERVER_LICENSE_KEY';
10const multer = require('multer');
11const { response } = require('express');
12const upload = multer();
13const serverFolder = 'server';
14const sentDocuments = 'sentDocuments';
15const serverHandler = `/${serverFolder}/handler.js`;
16
17// Initialize PDFNet once when the module loads
18let isInitialized = false;
19let initializationError = null;
20
21async function initializeServer() {
22 try {
23 console.log('Initializing PDFNet Data Extraction module...');
24
25 // Check if DataExtractionModule is available
26 if (!PDFNet.DataExtractionModule) {
27 throw new Error('DataExtractionModule not available');
28 }
29
30 // Check if document classification is available
31 const isAvailable = await PDFNet.DataExtractionModule.isModuleAvailable(
32 PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification);
33 if (!isAvailable) {
34 throw new Error('Document Classification module not available');
35 }
36
37 isInitialized = true;
38 } catch (error) {
39 console.error('❌ Error initializing Data Extraction module:', error.message);
40 initializationError = error;
41 }
42}
43
44module.exports = async (app) => {
45
46 // Handle POST request sent to '/server/handler.js'
47 // This endpoint receives the currently loaded PDF file in the Apryse webviewer and sends
48 // its classification back to the client as JSON data
49 app.post(serverHandler, upload.any(), async (request, response) => {
50 try {
51 // Check if PDFNet is initialized
52 if (!isInitialized) {
53 return response.status(503).json({
54 error: 'Server not properly initialized',
55 details: initializationError?.message
56 });
57 }
58
59 if (!fs.existsSync(sentDocuments))
60 fs.mkdirSync(sentDocuments);
61
62 const sentPdf = path.resolve(__dirname, `./${sentDocuments.split('/').pop()}/${request.query.filename}`);
63 fs.writeFileSync(sentPdf, request.files[0].buffer);
64
65 response.header('Content-Type', 'application/json');
66
67 const opts = new PDFNet.DataExtractionModule.DataExtractionOptions();
68 opts.setPages('1-2'); // process 2 pages
69 console.log(opts);
70 const json = await PDFNet.DataExtractionModule.extractDataAsString(
71 sentPdf, PDFNet.DataExtractionModule.DataExtractionEngine.e_DocClassification, opts);
72
73 await fs.promises.unlink(sentPdf); // delete the document after processing
74 response.status(200).send(json);
75 } catch (e) {
76 console.error('Error processing document:', e);
77 response.status(500).json({
78 error: `Error extracting JSON text from PDF file ${request.query.filename}`,
79 details: e.message
80 });
81 }
82 });
83};
84
85// Initialize PDFNet when module loads
86PDFNet.runWithoutCleanup(initializeServer, licenseKey).catch(error => {
87 console.error('❌ Fatal error initializing PDFNet:', error);
88 initializationError = error;
89});
1
2const express = require('express');
3const fs = require('fs');
4const bodyParser = require('body-parser');
5const open = (...args) => import('open').then(({ default: open }) => open(...args));
6const handler = require('./handler.js');
7const port = process.env.PORT || 5050;
8const app = express();
9const sentPdfs = 'sentPdfs';
10
11// CORS middleware to allow cross-origin requests from the playground
12app.use((req, res, next) => {
13 res.header('Access-Control-Allow-Origin', '*');
14 res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
15 res.header('Access-Control-Allow-Headers', 'Origin, X-Requested-With, Content-Type, Accept, Authorization');
16
17 // Handle preflight OPTIONS requests
18 if (req.method === 'OPTIONS') {
19 res.sendStatus(200);
20 } else {
21 next();
22 }
23});
24
25app.use(bodyParser.text());
26app.use('/client', express.static('../client')); // For statically serving 'client' folder at '/'
27
28handler(app);
29
30// Run server
31const server = app.listen(port, 'localhost', (err) => {
32 if (err) {
33 console.error(err);
34 } else {
35 console.info(`Server is listening at http://localhost:${port}`);
36
37 }
38});
39
40// Server shutdown and cleanup
41function shutdown() {
42 console.log('Cleanup started...');
43
44 // Example: Close server
45 server.close(() => {
46 console.log('Server closed.');
47
48 // Removes sent PDFs folder
49 if (fs.existsSync(sentPdfs))
50 fs.rmdirSync(sentPdfs, { recursive: true });
51
52 // If no async cleanup, exit directly
53 process.exit(0);
54 });
55}
56
57// Handle shutdown signals
58process.on('SIGINT', shutdown); // Ctrl+C
59process.on('SIGTERM', shutdown); // kill command or Docker stop
60process.on('uncaughtException', (err) => {
61 console.error('Uncaught Exception:', err);
62 shutdown();
63});
1{
2 "name": "document-classification-server",
3 "version": "1.0.0",
4 "description": "Document Classification Demo Server Component",
5 "main": "server.js",
6 "scripts": {
7 "start": "node server.js",
8 "dev": "node server.js"
9 },
10 "dependencies": {
11 "@pdftron/data-extraction": "^11.8.0",
12 "@pdftron/pdfnet-node": "^11.8.0",
13 "body-parser": "^1.20.2",
14 "express": "^4.18.2",
15 "multer": "^1.4.4",
16 "open": "^9.1.0"
17 },
18 "keywords": [
19 "document-classification",
20 "pdf",
21 "server",
22 "pdftron",
23 "webviewer"
24 ],
25 "author": "Apryse",
26 "license": "MIT"
27}
28
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales