This JavaScript sample lets you select elements in a PDF by extracting PDF path points. A user can click an element in a PDF to see its bounding box and path outline. This sample works on all browsers (including IE11) and mobile devices without using plug-ins. To see an example visit our Element Selection demo. Learn more about our Web SDK.
1(exports => {
2 const PDFNet = exports.Core.PDFNet;
3 // Stores information of the elements of each page so that we don't have to recompute them on subsequent clicks
4 let pageElementDataList = [];
5
6 // prevListenerFunc required to clean up mouse event listeners after switching documents
7 let prevListenerFunc;
8 // keep track of previously created annotations so that they can be cleaned up
9 let prevAnnotations = [];
10
11 const runCustomScript = (pdfDoc, layersContextID, windowCoord, pageNum, documentViewer, Annotations, annotationManager) => {
12 const displayModeManager = documentViewer.getDisplayModeManager();
13 const displayMode = displayModeManager.getDisplayMode();
14 const docCore = documentViewer.getDocument();
15
16 const DrawRectangleAnnot = async (pageNumber, x1, y1, x2, y2) => {
17 const p1 = docCore.getViewerCoordinates(pageNumber, x1, y1);
18 const p2 = docCore.getViewerCoordinates(pageNumber, x2, y2);
19
20 const displayAnnot = new Annotations.RectangleAnnotation();
21 displayAnnot.setPageNumber(pageNumber);
22 displayAnnot.setRect(new Core.Math.Rect(p1.x, Math.min(p1.y, p2.y), p2.x, Math.max(p1.y, p2.y)));
23 annotationManager.addAnnotation(displayAnnot);
24 prevAnnotations.push(displayAnnot);
25 };
26
27 const DrawPointAnnot = async (pageNumber, x, y) => {
28 const p1 = docCore.getViewerCoordinates(pageNumber, x, y);
29 const p2 = docCore.getViewerCoordinates(pageNumber, x, y);
30 p1.x -= 2;
31 p1.y -= 2;
32 p2.x += 2;
33 p2.y += 2;
34 const displayAnnot = new Annotations.RectangleAnnotation();
35 displayAnnot.setPageNumber(pageNumber);
36
37 displayAnnot.FillColor = new Annotations.Color(255, 255, 0, 1);
38 displayAnnot.StrokeColor = new Annotations.Color(255, 0, 0, 1);
39
40 displayAnnot.setRect(new Core.Math.Rect(p1.x, Math.min(p1.y, p2.y), p2.x, Math.max(p1.y, p2.y)));
41 annotationManager.addAnnotation(displayAnnot);
42 prevAnnotations.push(displayAnnot);
43 };
44
45 // Draw out all path points
46 const ProcessPaths = async (opr, pointList, currTransMtx, pageNumber) => {
47 let pointIndex = 0;
48 if (opr.length > 4000) {
49 console.log('Processing ' + opr.length + ' points. This will take significant time.');
50 } else if (opr.length > 500) {
51 console.log('Processing ' + opr.length + ' points. This may take some time.');
52 }
53
54 for (let oprIndex = 0; oprIndex < opr.length; ++oprIndex) {
55 let x1;
56 let y1;
57 let x2;
58 let y2;
59 let x3;
60 let y3;
61 let x4;
62 let y4;
63 let w;
64 let h;
65 let pagePoint;
66 let pagePoint1;
67 let pagePoint2;
68 let pagePoint3;
69 let pagePoint4;
70
71 switch (opr[oprIndex]) {
72 case PDFNet.Element.PathSegmentType.e_moveto:
73 // code to handle move segments
74 x1 = pointList[pointIndex];
75 ++pointIndex;
76 y1 = pointList[pointIndex];
77 ++pointIndex;
78 pagePoint = await currTransMtx.mult(x1, y1);
79 await DrawPointAnnot(pageNumber, pagePoint.x, pagePoint.y);
80 break;
81 case PDFNet.Element.PathSegmentType.e_lineto:
82 // code to handle line segments
83 x1 = pointList[pointIndex];
84 ++pointIndex;
85 y1 = pointList[pointIndex];
86 ++pointIndex;
87 pagePoint = await currTransMtx.mult(x1, y1);
88 await DrawPointAnnot(pageNumber, pagePoint.x, pagePoint.y);
89 break;
90 case PDFNet.Element.PathSegmentType.e_cubicto:
91 // code to handle cubic segments
92 x1 = pointList[pointIndex];
93 ++pointIndex;
94 y1 = pointList[pointIndex];
95 ++pointIndex;
96 x2 = pointList[pointIndex];
97 ++pointIndex;
98 y2 = pointList[pointIndex];
99 ++pointIndex;
100 x3 = pointList[pointIndex];
101 ++pointIndex;
102 y3 = pointList[pointIndex];
103 ++pointIndex;
104 pagePoint = await currTransMtx.mult(x3, y3);
105 await DrawPointAnnot(pageNumber, pagePoint.x, pagePoint.y);
106 break;
107 case PDFNet.Element.PathSegmentType.e_rect:
108 // code to handle rect segments
109 x1 = pointList[pointIndex];
110 ++pointIndex;
111 y1 = pointList[pointIndex];
112 ++pointIndex;
113 w = pointList[pointIndex];
114 ++pointIndex;
115 h = pointList[pointIndex];
116 ++pointIndex;
117 x2 = x1 + w;
118 y2 = y1;
119 x3 = x2;
120 y3 = y1 + h;
121 x4 = x1;
122 y4 = y3;
123 pagePoint1 = await currTransMtx.mult(x1, y1);
124 pagePoint2 = await currTransMtx.mult(x2, y2);
125 pagePoint3 = await currTransMtx.mult(x3, y3);
126 pagePoint4 = await currTransMtx.mult(x4, y4);
127
128 await DrawPointAnnot(pageNumber, pagePoint1.x, pagePoint1.y);
129 await DrawPointAnnot(pageNumber, pagePoint2.x, pagePoint2.y);
130 await DrawPointAnnot(pageNumber, pagePoint3.x, pagePoint3.y);
131 await DrawPointAnnot(pageNumber, pagePoint4.x, pagePoint4.y);
132 break;
133 case PDFNet.Element.PathSegmentType.e_closepath:
134 break;
135 default:
136 break;
137 }
138 }
139 // ensure that we update the view
140 annotationManager.drawAnnotations(pageNumber);
141 };
142
143 const ProcessElements = async (pageElementData, pageBuilder, doc, page, pageNumber, pdfMousePoint, selectTopElementOnly) => {
144 // Read page contents, last object is top object
145 let pageRotMtx = await page.getDefaultMatrix();
146 pageRotMtx = await pageRotMtx.inverse();
147 const rotatedMousePoint = await pageRotMtx.mult(pdfMousePoint.x, pdfMousePoint.y);
148 // (optional) display mouse point
149 // await DrawPointAnnot(pageNumber, rotatedMousePoint.x, rotatedMousePoint.y);
150 for (let elementNum = pageElementData.length - 1; elementNum >= 0; elementNum--) {
151 const element = pageElementData[elementNum];
152 const elementBBox = element.bbox;
153 // Check bounding box
154 if (elementBBox.x1 < rotatedMousePoint.x && elementBBox.x2 > rotatedMousePoint.x && elementBBox.y1 < rotatedMousePoint.y && elementBBox.y2 > rotatedMousePoint.y) {
155 console.log('bounding box detected');
156 } else {
157 // mouseclick outside of any available bbox;
158 continue;
159 }
160 await DrawRectangleAnnot(pageNumber, elementBBox.x1, elementBBox.y1, elementBBox.x2, elementBBox.y2);
161 if (element.name === 'path') {
162 await ProcessPaths(element.operators, element.points, element.ctm, pageNumber);
163 }
164 if (selectTopElementOnly) {
165 break;
166 }
167 }
168 };
169
170 // Store all information we need so that we won't have to do this a second time.
171 const ExtractElements = async pageReader => {
172 let elementArray = [];
173 // Read page contents
174 for (let element = await pageReader.next(); element !== null; element = await pageReader.next()) {
175 // does not display invisible elements or clipping path elements
176 if (!(await element.isOCVisible()) || (await element.isClippingPath())) {
177 continue;
178 }
179 // trace out images and paths (does not include text)
180 const ctm = await element.getCTM();
181 const elemType = await element.getType();
182 let elementBBox;
183 let retObj;
184 switch (elemType) {
185 case PDFNet.Element.Type.e_path: // Process path data
186 {
187 // extract path information
188 const pathinfo = await element.getPathData();
189 const opr = new Uint8Array(pathinfo.operators);
190 const points = new Float64Array(pathinfo.points);
191 elementBBox = await element.getBBox();
192 retObj = {
193 name: 'path',
194 type: elemType,
195 ctm,
196 operators: opr,
197 points,
198 bbox: elementBBox,
199 };
200 elementArray.push(retObj);
201 }
202 break;
203 case PDFNet.Element.Type.e_image: // Process image data
204 {
205 elementBBox = await element.getBBox();
206 const elementXObj = await element.getXObject();
207 const elementNum = await elementXObj.getObjNum();
208 retObj = {
209 name: 'image',
210 type: elemType,
211 num: elementNum,
212 ctm,
213 bbox: elementBBox,
214 };
215 elementArray.push(retObj);
216 }
217 break;
218 case PDFNet.Element.Type.e_form: // Process form XObjects
219 {
220 pageReader.formBegin();
221 const elemArray2 = await ExtractElements(pageReader);
222 elementArray = elementArray.concat(elemArray2);
223 pageReader.end();
224 }
225 break;
226 default:
227 break;
228 }
229 }
230 return elementArray;
231 };
232
233 const main = async () => {
234 try {
235 const doc = pdfDoc;
236 doc.lock();
237 doc.initSecurityHandler();
238
239 // to select all elements underneath mouse click instead of just the top-most element, change to false.
240 const selectTopElementOnly = true;
241
242 const viewerPageCoord = displayMode.windowToPage(windowCoord, pageNum);
243 let pdfCoord = docCore.getPDFCoordinates(pageNum, viewerPageCoord.x, viewerPageCoord.y);
244
245 const pageReader = await PDFNet.ElementReader.create();
246 const pageBuilder = await PDFNet.ElementBuilder.create();
247
248 let currPage = await doc.getPage(pageNum);
249 // making sure mouse position is adjusted for rotations
250 const pageRotMtx = await currPage.getDefaultMatrix();
251 pdfCoord = await pageRotMtx.mult(pdfCoord.x, pdfCoord.y);
252
253 let pageElementData = pageElementDataList[pageNum];
254 let layersContext;
255 // Read from the document and find its relevant elements if we haven't done so before.
256 if (pageElementData === undefined) {
257 currPage = await doc.getPage(pageNum);
258 layersContext = new PDFNet.OCGContext(layersContextID);
259 pageReader.beginOnPage(currPage, layersContext);
260
261 pageElementData = await ExtractElements(pageReader);
262 pageElementDataList[pageNum] = pageElementData;
263 pageReader.end();
264 }
265
266 // Process the found elements
267 currPage = await doc.getPage(pageNum);
268 layersContext = new PDFNet.OCGContext(layersContextID);
269 await ProcessElements(pageElementData, pageBuilder, doc, currPage, pageNum, pdfCoord, selectTopElementOnly);
270
271 const sq = await PDFNet.SquareAnnot.create(doc, PDFNet.Rect(10, 200, 800, 300));
272 sq.setColor(await PDFNet.ColorPt.init(0, 0, 0), 3);
273 sq.refreshAppearance();
274 currPage.annotPushBack(sq);
275 } catch (err) {
276 console.log(err.stack);
277 }
278 };
279
280 // add your own license key as the second parameter, e.g. PDFNet.runWithCleanup(main, 'YOUR_LICENSE_KEY')
281 return PDFNet.runWithCleanup(main);
282 };
283
284 window.addEventListener('documentLoaded', () => {
285 PDFNet.initialize().then(() => {
286 // get document
287 let stillRunning = false;
288 const documentViewer = instance.Core.documentViewer;
289 const Annotations = instance.Core.Annotations;
290 const doc = documentViewer.getDocument();
291 doc.getPDFDoc().then(pdfDoc => {
292 if (prevListenerFunc) {
293 // If we have a previously loaded pdf document, remove any event listeners from that document.
294 documentViewer.getViewerElement().removeEventListener('mousedown', prevListenerFunc);
295 // Clear out any information about the pdf's elements we may have stored.
296 pageElementDataList = [];
297 }
298 const handleMouseClick = evt => {
299 // Make a check to see if processes are still running to prevent multiple from running at same time.
300 if (!stillRunning) {
301 stillRunning = true;
302 const annotationManager = instance.Core.documentViewer.getAnnotationManager();
303 if (prevAnnotations.length > 0) {
304 for (let i = 0; i < prevAnnotations.length; i++) {
305 annotationManager.deleteAnnotation(prevAnnotations[i]);
306 }
307 prevAnnotations = [];
308 }
309 console.log('MouseClick X: ' + evt.pageX + ', MouseClick Y: ' + evt.pageY);
310
311 // Get the Window coordinates
312 const scrollContainer = document.querySelector('.DocumentContainer');
313 const viewportTop = scrollContainer.scrollTop;
314 const viewportLeft = scrollContainer.scrollLeft;
315 const windowCoord = { x: evt.pageX + viewportLeft, y: evt.pageY + viewportTop };
316
317 const displayModeManager = documentViewer.getDisplayModeManager();
318 const displayMode = displayModeManager.getDisplayMode();
319 // Get which page was clicked on
320 const pageNumber = displayMode.getSelectedPages(windowCoord, windowCoord).first;
321
322 // Get the context from the doc which is used for properly reading the elements on the pdf document.
323 // layers context object, whenever layers changed, want to recalculate.
324 pdfDoc
325 .requirePage(pageNumber)
326 .then(() => doc.extractPDFNetLayersContext())
327 // running custom PDFNetJS script
328 .then(layersContextID => runCustomScript(pdfDoc, layersContextID, windowCoord, pageNumber, documentViewer, Annotations, annotationManager))
329 .then(() => {
330 console.log('finished script');
331 // refresh information on viewer and update appearance
332 documentViewer.updateView();
333 stillRunning = false;
334 });
335 }
336 };
337 prevListenerFunc = handleMouseClick;
338 documentViewer.getViewerElement().addEventListener('mousedown', handleMouseClick);
339 });
340 });
341 });
342})(window);
343// eslint-disable-next-line spaced-comment
344//# sourceURL=config.js
Did you find this helpful?
Trial setup questions?
Ask experts on DiscordNeed other help?
Contact SupportPricing or product questions?
Contact Sales