Text position

This JavaScript sample lets you search and highlight text in PDF, DOCX, PPTX and XLSX documents by extracting text position (no servers or other external dependencies required). Users can search through a PDF for a whole word or look for case sensitive words. If a word is located in the document it is highlighted and the user can jump to the position of the word in the document. This sample works on all browsers (including IE11) and mobile devices without using plug-ins. To view an example visit our Text Search & Highlight demo. Learn more about our Web SDK.

1const viewerElement = document.getElementById('viewer');
2// eslint-disable-next-line no-undef
3const WebViewerConstructor = isWebComponent() ? WebViewer.WebComponent : WebViewer;
4
5WebViewerConstructor(
6 {
7 path: '../../../lib',
8 initialDoc: 'https://pdftron.s3.amazonaws.com/downloads/pl/legal-contract.pdf',
9 },
10 viewerElement
11).then(instance => {
12 samplesSetup(instance);
13 const { documentViewer, annotationManager, Annotations } = instance.Core;
14
15 const renderCheckBoxes = pageCount => {
16 const pagesDiv = document.getElementById('pages');
17 let pageNumber;
18 const checkboxes = [];
19
20 for (pageNumber = 1; pageNumber <= pageCount; pageNumber++) {
21 const input = document.createElement('input');
22 /* eslint-disable prefer-template */
23 input.id = `page-${pageNumber}`;
24 input.type = 'checkbox';
25 input.checked = false;
26 input.value = pageNumber;
27
28 checkboxes.push(input);
29
30 const label = document.createElement('label');
31 label.htmlFor = `page-${pageNumber}`;
32 label.innerHTML = `Page ${pageNumber}`;
33
34 const lineBreak = document.createElement('br');
35
36 pagesDiv.appendChild(input);
37 pagesDiv.appendChild(label);
38 pagesDiv.appendChild(lineBreak);
39 }
40
41 return checkboxes;
42 };
43
44 const highlightText = (searchText, pageNumber) => {
45 const doc = documentViewer.getDocument();
46
47 // gets all text on the requested page
48 // see https://docs.apryse.com/api/web/Core.Document.html#loadPageText__anchor
49 doc.loadPageText(pageNumber).then(text => {
50 let textStartIndex = 0;
51 let textIndex;
52 const annotationPromises = [];
53
54 // find the position of the searched text and add text highlight annotation at that location
55 while ((textIndex = text.indexOf(searchText, textStartIndex)) > -1) {
56 textStartIndex = textIndex + searchText.length;
57 // gets quads for each of the characters from start to end index. Then,
58 // resolve the annotation and return.
59 // see https://docs.apryse.com/api/web/Core.Document.html#getTextPosition__anchor
60 const annotationPromise = doc.getTextPosition(pageNumber, textIndex, textIndex + searchText.length).then(quads => {
61 const annotation = new Annotations.TextHighlightAnnotation();
62 annotation.Author = annotationManager.getCurrentUser();
63 annotation.PageNumber = pageNumber;
64 annotation.Quads = quads;
65 annotation.StrokeColor = new Annotations.Color(0, 255, 255);
66 return annotation;
67 });
68 annotationPromises.push(annotationPromise);
69 }
70
71 // Wait for all annotations to be resolved.
72 Promise.all(annotationPromises).then(annotations => {
73 annotationManager.addAnnotations(annotations);
74 annotationManager.selectAnnotations(annotations);
75 });
76 });
77 };
78
79 const removeHighlightedText = pageNumber => {
80 const annotations = annotationManager.getAnnotationsList().filter(annotation => {
81 return annotation.PageNumber === pageNumber;
82 });
83 annotationManager.deleteAnnotations(annotations);
84 };
85
86 documentViewer.addEventListener('documentLoaded', () => {
87 const textInput = document.getElementById('text');
88 const checkboxes = renderCheckBoxes(documentViewer.getPageCount());
89
90 checkboxes.forEach(checkbox => {
91 checkbox.addEventListener('change', () => {
92 const pageNumber = Number(checkbox.value);
93
94 if (checkbox.checked && textInput.value) {
95 highlightText(textInput.value, pageNumber);
96 } else {
97 removeHighlightedText(pageNumber);
98 }
99 });
100 });
101
102 textInput.addEventListener(
103 'input',
104 // debounce loaded elsewhere
105 // eslint-disable-next-line
106 debounce(() => {
107 checkboxes.forEach(checkbox => {
108 const pageNumber = Number(checkbox.value);
109
110 if (checkbox.checked) {
111 removeHighlightedText(pageNumber);
112
113 if (textInput.value) {
114 highlightText(textInput.value, pageNumber);
115 }
116 }
117 });
118 }, 200)
119 );
120
121 // highlight search text in the first page by default
122 checkboxes[0].checked = true;
123 highlightText(textInput.value, 1);
124 });
125});

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales