Extract Image from PDFs - Go Sample Code

Sample code for using Apryse SDK to extract images from PDF files, along with their positioning information and DPI; provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby and VB. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample).

Learn more about our full PDF Data Extraction SDK Capabilities.

To start your free trial, get stated with Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8 "fmt"
9 "strconv"
10 . "pdftron"
11)
12
13import "pdftron/Samples/LicenseKey/GO"
14
15//-----------------------------------------------------------------------------------
16// This sample illustrates one approach to PDF image extraction
17// using PDFNet.
18//
19// Note: Besides direct image export, you can also convert PDF images
20// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
21// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
22// sample project).
23//-----------------------------------------------------------------------------------
24
25var imageCounter = 0
26
27// Relative path to the folder containing the test files.
28var inputPath = "../../TestFiles/"
29var outputPath = "../../TestFiles/Output/"
30
31func ImageExtract(reader ElementReader){
32 element := reader.Next()
33
34 for element.GetMp_elem().Swigcptr() != 0{
35 if (element.GetType() == ElementE_image ||
36 element.GetType() == ElementE_inline_image){
37 imageCounter += 1
38 fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
39 fmt.Println(" Width: " + strconv.Itoa(element.GetImageWidth()))
40 fmt.Println(" Height: " + strconv.Itoa(element.GetImageHeight()))
41 fmt.Println(" BPC: " + strconv.Itoa(element.GetBitsPerComponent()))
42
43 ctm := element.GetCTM()
44 x2 := 1
45 y2 := 1
46 pt := NewPoint(float64(x2), float64(y2))
47 point := ctm.Mult(pt)
48 fmt.Println(" Coords: x1=%.2f, y1=%.2f, x2=%.2f, y2=%.2f", ctm.GetM_h(), ctm.GetM_v(), point.GetX(), point.GetY())
49
50 if element.GetType() == ElementE_image{
51 image := NewImage(element.GetXObject())
52
53 fname := "image_extract1_" + strconv.Itoa(imageCounter)
54
55 path := outputPath + fname
56 image.Export(path)
57
58 //path = outputPath + fname + ".tif"
59 //image.ExportAsTiff(path)
60
61 //path = outputPath + fname + ".png"
62 //image.ExportAsPng(path)
63 }
64 }else if element.GetType() == ElementE_form{
65 reader.FormBegin()
66 ImageExtract(reader)
67 reader.End()
68 }
69 element = reader.Next()
70 }
71}
72
73func main(){
74 // Initialize PDFNet
75 PDFNetInitialize(PDFTronLicense.Key)
76
77 // Example 1:
78 // Extract images by traversing the display list for
79 // every page. With this approach it is possible to obtain
80 // image positioning information and DPI.
81
82 doc := NewPDFDoc(inputPath + "newsletter.pdf")
83 doc.InitSecurityHandler()
84
85 reader := NewElementReader()
86
87 // Read every page
88 itr := doc.GetPageIterator()
89 for itr.HasNext(){
90 reader.Begin(itr.Current())
91 ImageExtract(reader)
92 reader.End()
93 itr.Next()
94 }
95
96 doc.Close()
97 fmt.Println("Done.")
98
99 fmt.Println("----------------------------------------------------------------")
100
101 // Example 2:
102 // Extract images by scanning the low-level document.
103
104 doc = NewPDFDoc(inputPath + "newsletter.pdf")
105 doc.InitSecurityHandler()
106 imageCounter= 0
107
108 cosDoc := doc.GetSDFDoc()
109 numObjs := cosDoc.XRefSize()
110 i := uint(1)
111 for i < numObjs{
112 obj := cosDoc.GetObj(i)
113 if(obj != nil && !obj.IsFree() && obj.IsStream()){
114
115 // Process only images
116 itr := obj.Find("Type")
117
118 if (!itr.HasNext()) || (itr.Value().GetName() != "XObject"){
119 i = i + 1
120 continue
121 }
122 itr = obj.Find("Subtype")
123 if (!itr.HasNext()) || (itr.Value().GetName() != "Image"){
124 i = i + 1
125 continue
126 }
127 image := NewImage(obj)
128
129 imageCounter = imageCounter + 1
130 fmt.Println("--> Image: " + strconv.Itoa(imageCounter))
131 fmt.Println(" Width: " + strconv.Itoa(image.GetImageWidth()))
132 fmt.Println(" Height: " + strconv.Itoa(image.GetImageHeight()))
133 fmt.Println(" BPC: " + strconv.Itoa(image.GetBitsPerComponent()))
134
135 fname := "image_extract2_" + strconv.Itoa(imageCounter)
136
137 path := outputPath + fname
138 image.Export(path)
139
140 //path = outputPath + fname + ".tif"
141 //image.ExportAsTiff(path)
142
143 //path = outputPath + fname + ".png"
144 //image.ExportAsPng(path)
145 }
146 i = i + 1
147 }
148 doc.Close()
149 PDFNetTerminate()
150 fmt.Println("Done.")
151}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales