Search and Redact PDF on Server/Desktop

Requirements

View Demo

Package: Redaction

This sample code searches a PDF document for all instances of a search pattern and redacts these instances.

1const { PDFNet } = require('@pdftron/pdfnet-node');
2const ApryseLicense = require('../LicenseKey/LicenseKey');
3
4const main = async () => {
5  const pattern = 'features'; // expression to search for
6  const redactText = 'Top Secret'; // text to show in place of redacted content. Can be empty string.
7
8  const doc = await PDFNet.PDFDoc.createFromURL(filename);
9  const txtSearch = await PDFNet.TextSearch.create();
10  const mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_highlight; // Use whole word search
11
12  txtSearch.setMode(mode);
13  txtSearch.setPattern(pattern);
14
15  // call Begin() method to initialize the text search.
16  txtSearch.begin(doc, pattern, mode);
17  const redactions = []; // array to hold redaction objects
18  let result;
19  // loop to find all instances of the pattern
20  while ((result = await txtSearch.run()).code === PDFNet.TextSearch.ResultCode.e_found) {
21    // add a redaction object based on the location of the found instance
22    const highlights = result.highlights;
23    await highlights.begin(doc);
24    while (await highlights.hasNext()) {
25      const pageNumber = await highlights.getCurrentPageNumber();
26      const quadArr = await highlights.getCurrentQuads();
27      for (let i = 0; i < quadArr.length; ++i) {
28        const currQuad = quadArr[i];
29        const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
30        const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
31        const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
32        const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
33        redactions.push(await PDFNet.Redactor.redactionCreate(pageNumber, (await PDFNet.Rect.init(x1, y1, x2, y2)), false, redactText));
34      }
35      highlights.next();
36    }
37  }
38  const appearance = {};
39  appearance.redaction_overlay = true;
40  appearance.border = false;
41  appearance.positive_overlay_color = await PDFNet.ColorPt.init(1, 0.2, 0.2, 0); // red
42  appearance.show_redacted_content_regions = true;
43  PDFNet.Redactor.redact(doc, redactions, appearance, false, false);
44  doc.save('textsearch_redacted.pdf', PDFNet.SDFDoc.SaveOptions.e_linearized);
45}
46
47PDFNet.runWithCleanup(main, ApryseLicense.key) // provide your license key here
48  .catch(function (error) {
49    console.log('Error: ' + JSON.stringify(error));
50  })
51  .then(function () {
52    PDFNet.shutdown();
53  });
54

1import com.pdftron.common.PDFNetException;
2import com.pdftron.pdf.*;
3import com.pdftron.pdf.Redactor.Redaction;
4import com.pdftron.sdf.SDFDoc;
5import java.util.ArrayList;
6import java.util.List;
7
8class SearchAndRedact {
9    public static void main(String[] args) {
10        PDFNet.initialize(PDFTronLicense.Key());
11        String pattern = "features"; // expression to search for
12        String redact_text = "Top Secret"; // text to show in place of redacted content. Can be empty string.
13
14        try (PDFDoc doc = new PDFDoc(FileNames.input)) {
15            TextSearch txt_search = new TextSearch();
16            int mode = TextSearch.e_whole_word | TextSearch.e_highlight; // Use whole word search
17            txt_search.setMode(mode);
18            txt_search.setPattern(pattern);
19            // call Begin() method to initialize the text search.
20            txt_search.begin(doc, pattern, mode, -1, -1);
21
22            // List to hold redaction objects
23            List<Redactor.Redaction> redactions = new ArrayList<Redactor.Redaction>();
24            TextSearchResult result;
25            while ((result = txt_search.run()).getCode() == TextSearchResult.e_found) {
26                // add a redaction object based on the location of the found instance
27                Highlights highlights = result.getHighlights();
28                highlights.begin(doc);
29                while (highlights.hasNext()) {
30                    int page_number = highlights.getCurrentPageNumber();
31                    double[] quads = highlights.getCurrentQuads();
32                    int quad_count = quads.length / 8;
33                    for (int i = 0; i < quad_count; ++i) {
34                        //assume each quad is an axis-aligned rectangle
35                        int offset = 8 * i;
36                        double x1 = Math.min(Math.min(Math.min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
37                        double x2 = Math.max(Math.max(Math.max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
38                        double y1 = Math.min(Math.min(Math.min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
39                        double y2 = Math.max(Math.max(Math.max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
40                        Rect rect = new Rect(x1, y1, x2, y2);
41                        redactions.add(new Redactor.Redaction(page_number, rect, false, redact_text));
42                    }
43                    highlights.next();
44                }
45            }
46            Redactor.Appearance appearance = new Redactor.Appearance();
47            appearance.redactionOverlay = true;
48            appearance.border = false;
49            appearance.positiveOverlayColor = new ColorPt(1, 0.2, 0.2, 0); // red
50            appearance.showRedactedContentRegions = true;
51            Redaction[] redactions_array = new Redaction[redactions.size()];
52            redactions.toArray(redactions_array);
53            Redactor.redact(doc, redactions_array, appearance, false, false);
54            doc.save(FileNames.output, SDFDoc.SaveMode.LINEARIZED, null);
55        } catch (PDFNetException e) {
56            System.out.println(e);
57        }
58        PDFNet.terminate();
59    }
60}
61

1using System;
2using System.Collections;
3
4using pdftron;
5using pdftron.Common;
6using pdftron.SDF;
7using pdftron.PDF;
8
9namespace SearchAndRedactCS
10{
11   class Class1
12   {
13      private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
14      static Class1() { }
15
16      static void Main(string[] args)
17      {
18         PDFNet.Initialize(PDFTronLicense.Key);
19         string pattern = "features"; // expression to search for
20         string redact_text = "Top Secret"; // text to show in place of redacted content. Can be empty string.
21         using (PDFDoc doc = new PDFDoc(FileNames.input))
22         {
23            try
24            {
25
26               TextSearch txt_search = new TextSearch();
27               int mode = (int)(TextSearch.SearchMode.e_whole_word | TextSearch.SearchMode.e_highlight); // Use whole word search
28               // call Begin() method to initialize the text search.
29               txt_search.Begin(doc, pattern, mode, -1, -1);
30               // List to hold redaction objects
31               ArrayList redactions = new ArrayList();
32               int page_num = 0;
33               string result_str = "", ambient_string = "";
34               Highlights highlights = new Highlights();
35               while (txt_search.Run(ref page_num, ref result_str, ref ambient_string, highlights) == TextSearch.ResultCode.e_found)
36               {
37                  // add a redaction object based on the location of the found instance
38                  highlights.Begin(doc);
39                  while (highlights.HasNext())
40                  {
41                     int page_number = highlights.GetCurrentPageNumber();
42                     double[] quads = highlights.GetCurrentQuads();
43                     int quad_count = quads.Length / 8;
44                     for (int i = 0; i < quad_count; ++i)
45                     {
46                        //assume each quad is an axis-aligned rectangle
47                        int offset = 8 * i;
48                        double x1 = Math.Min(Math.Min(Math.Min(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
49                        double x2 = Math.Max(Math.Max(Math.Max(quads[offset + 0], quads[offset + 2]), quads[offset + 4]), quads[offset + 6]);
50                        double y1 = Math.Min(Math.Min(Math.Min(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
51                        double y2 = Math.Max(Math.Max(Math.Max(quads[offset + 1], quads[offset + 3]), quads[offset + 5]), quads[offset + 7]);
52                        redactions.Add(new Redactor.Redaction(page_number, new Rect(x1, y1, x2, y2), false, redact_text));
53                     }
54                     highlights.Next();
55                  }
56               }
57               Redactor.Appearance appearance = new Redactor.Appearance();
58               appearance.RedactionOverlay = true;
59               appearance.Border = false;
60               appearance.PositiveOverlayColor = new ColorPt(1, 0.2, 0.2, 0); // red
61               appearance.ShowRedactedContentRegions = true;
62               Redactor.Redact(doc, redactions, appearance, false, false);
63               doc.Save(FileNames.output, SDFDoc.SaveOptions.e_linearized);
64            }
65            catch (PDFNetException e)
66            {
67               Console.WriteLine(e.Message);
68            }
69            PDFNet.Terminate();
70         }
71      }
72   }
73}
74

PDF redaction
Full code sample which shows how to use Apryse's PDFNet.Redactor to remove potentially sensitive content within PDF documents.

About redactor

Apryse Redactor makes sure that if a portion of an image, text, or vector graphics is contained in a redaction region, that portion of the image or path data is destroyed and is not hidden with clipping or image masks. Apryse SDK API can also be used to review and remove metadata and other content that can exist in a PDF document, including XML Forms Architecture (XFA) content and Extensible Metadata Platform (XMP) content.

The redaction process in Apryse SDK consists of two steps:

1. Content identification
A user applies redact annotations that specify the pieces or regions of content that should be removed. This example uses PDFNet.TextSearch to identify the content for redaction programmatically, but it can also be identified in other ways such as using PDFNet.TextExtractor, or interactively (e.g. using WebViewer). Up until the next step is performed, the user can see, move and redefine these annotations.

2. Content removal
Using PDFNet.Redactor.redact() the user instructs Apryse SDK to apply the redact regions, after which the content in the area specified by the redact annotations is removed. The redaction function includes a number of options to control the style of the redaction overlay (including color, text, font, border, transparency, etc.)

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

Product:

Search and Redact PDF on Server/Desktop

About redactor

On this page