Create Unicode Text, Embed CID in PDFs - C++ Sample Code

Sample code for using Apryse SDK to create Unicode text and embed composite fonts in PDF files. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK.

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5#include <PDF/PDFNet.h>
6#include <PDF/PDFDoc.h>
7#include <PDF/ElementBuilder.h>
8#include <PDF/ElementWriter.h>
9#include <PDF/ColorSpace.h>
10#include <PDF/ShapedText.h>
11#include <PDF/Rect.h>
12
13#include <Filters/MappedFile.h>
14#include <Filters/FilterReader.h>
15
16#include <fstream>
17#include <iostream>
18#include "../../LicenseKey/CPP/LicenseKey.h"
19
20using namespace std;
21
22using namespace pdftron;
23using namespace SDF;
24using namespace PDF;
25
26std::vector<UString> TextFileToStringList(const UString& file_path);
27
28/**
29 * This example illustrates how to create Unicode text and how to embed composite fonts.
30 *
31 * Note: This demo attempts to make use of 'arialuni.ttf' in the '/Samples/TestFiles'
32 * directory. Arial Unicode MS is about 24MB in size and used to come together with Windows and
33 * MS Office.
34 *
35 * In case you don't have access to Arial Unicode MS you can use another wide coverage
36 * font, like Google Noto, GNU UniFont, or cyberbit. Many of these are freely available,
37 * and there is a list maintained at https://en.wikipedia.org/wiki/Unicode_font
38 *
39 * If no specific font file can be loaded, the demo will fall back to system specific font
40 * substitution routines, and the result will depend on which fonts are available.
41 *
42 */
43int main(int argc, char *argv[])
44{
45 int ret = 0;
46 PDFNet::Initialize(LicenseKey);
47
48 // Relative path to the folder containing test files.
49 string input_path = "../../TestFiles/";
50 string output_path = "../../TestFiles/Output/";
51
52 try
53 {
54 PDFDoc doc;
55
56 ElementBuilder eb;
57 ElementWriter writer;
58
59 // Start a new page ------------------------------------
60 Page page = doc.PageCreate(Rect(0, 0, 612, 794));
61
62 writer.Begin(page); // begin writing to this page
63
64 string font_program = input_path + "ARIALUNI.TTF";
65
66 // RAII block for ifstream
67 {
68 std::ifstream ifs(font_program.c_str(), ios_base::in);
69#if defined(_WIN32)
70 if (!ifs.is_open()) {
71 font_program = string("C:/Windows/Fonts/ARIALUNI.TTF");
72 ifs.open(font_program.c_str(), ios_base::in);
73 }
74#endif
75 if (!ifs.is_open()) {
76 font_program.clear();
77 }
78 }
79
80 Font fnt;
81 if(font_program.size())
82 {
83 cout << "Note: using " << font_program << " for unshaped unicode text" << endl;
84 // if we can find a specific wide-coverage font file, then use that directly
85 fnt = Font::CreateCIDTrueTypeFont(doc, font_program.c_str(), true, true);
86 }
87 else
88 {
89 cout << "Note: using system font substitution for unshaped unicode text" << endl;
90 // if we can't find a specific file, then use system font subsitution
91 // as a fallback, using "Helvetica" as a hint
92 UString empty_temp;
93 fnt = Font::Create(doc, "Helvetica", empty_temp);
94 }
95
96 Element element = eb.CreateTextBegin(fnt, 1);
97 element.SetTextMatrix(10, 0, 0, 10, 50, 600);
98 element.GetGState().SetLeading(2); // Set the spacing between lines
99 writer.WriteElement(element);
100
101 // Hello World!
102 Unicode hello[] = { 'H','e','l','l','o',' ','W','o','r','l','d','!'};
103 writer.WriteElement(eb.CreateUnicodeTextRun(hello, sizeof(hello)/sizeof(Unicode)));
104 writer.WriteElement(eb.CreateTextNewLine());
105
106 // Latin
107 Unicode latin[] = {
108 'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0,
109 0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' // etc.
110 };
111 writer.WriteElement(eb.CreateUnicodeTextRun(latin, sizeof(latin)/sizeof(Unicode)));
112 writer.WriteElement(eb.CreateTextNewLine());
113
114 // Greek
115 Unicode greek[] = {
116 0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9 // etc.
117 };
118 writer.WriteElement(eb.CreateUnicodeTextRun(greek, sizeof(greek)/sizeof(Unicode)));
119 writer.WriteElement(eb.CreateTextNewLine());
120
121 // Cyrillic
122 Unicode cyrillic[] = {
123 0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
124 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419 // etc.
125 };
126 writer.WriteElement(eb.CreateUnicodeTextRun(cyrillic, sizeof(cyrillic)/sizeof(Unicode)));
127 writer.WriteElement(eb.CreateTextNewLine());
128
129 // Hebrew
130 Unicode hebrew[] = {
131 0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
132 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1 // etc.
133 };
134 writer.WriteElement(eb.CreateUnicodeTextRun(hebrew, sizeof(hebrew)/sizeof(Unicode)));
135 writer.WriteElement(eb.CreateTextNewLine());
136
137 // Arabic
138 Unicode arabic[] = {
139 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
140 0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635 // etc.
141 };
142 writer.WriteElement(eb.CreateUnicodeTextRun(arabic, sizeof(arabic)/sizeof(Unicode)));
143 writer.WriteElement(eb.CreateTextNewLine());
144
145 // Thai
146 Unicode thai[] = {
147 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09,
148 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12 // etc.
149 };
150 writer.WriteElement(eb.CreateUnicodeTextRun(thai, sizeof(thai)/sizeof(Unicode)));
151 writer.WriteElement(eb.CreateTextNewLine());
152
153 // Hiragana - Japanese
154 Unicode hiragana[] = {
155 0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
156 0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052 // etc.
157 };
158 writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, sizeof(hiragana)/sizeof(Unicode)));
159 writer.WriteElement(eb.CreateTextNewLine());
160
161 // CJK Unified Ideographs
162 Unicode cjk_uni[] = {
163 0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849,
164 0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852 // etc.
165 };
166 writer.WriteElement(eb.CreateUnicodeTextRun(cjk_uni, sizeof(cjk_uni)/sizeof(Unicode)));
167 writer.WriteElement(eb.CreateTextNewLine());
168
169 // Simplified Chinese
170 Unicode chinese_simplified[] = {
171 0x4e16, 0x754c, 0x60a8, 0x597d
172 };
173 writer.WriteElement(eb.CreateUnicodeTextRun(chinese_simplified, sizeof(chinese_simplified)/sizeof(Unicode)));
174 writer.WriteElement(eb.CreateTextNewLine());
175
176 // Finish the block of text
177 writer.WriteElement(eb.CreateTextEnd());
178
179 cout << "Now using text shaping logic to place text" << endl;
180
181 // Create a font in indexed encoding mode
182 // normally this would mean that we are required to provide glyph indices
183 // directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
184 // method to take care of this detail for us.
185 Font indexed_font = Font::CreateCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font::e_Indices);
186 element = eb.CreateTextBegin(indexed_font, 10);
187 writer.WriteElement(element);
188
189 double line_pos = 350.0;
190 double line_space = 20.0;
191
192 // Transform unicode text into an abstract collection of glyph indices and positioning info
193 ShapedText shaped_text = indexed_font.GetShapedText(UString("Shaped Hindi Text:"));
194
195 // transform the shaped text info into a PDF element and write it to the page
196 element = eb.CreateShapedTextRun(shaped_text);
197 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos);
198 writer.WriteElement(element);
199
200 // read in unicode text lines from a file
201 std::vector<UString> hindi_text = TextFileToStringList(input_path + "hindi_sample_utf16le.txt");
202
203 cout << "Read in " << hindi_text.size() << " lines of Unicode text from file" << endl;
204 for (size_t i = 0; i < hindi_text.size(); ++i)
205 {
206 shaped_text = indexed_font.GetShapedText(hindi_text[i]);
207 element = eb.CreateShapedTextRun(shaped_text);
208 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1));
209 writer.WriteElement(element);
210 cout << "Wrote shaped line to page" << endl;
211 }
212
213 // Finish the shaped block of text
214 writer.WriteElement(eb.CreateTextEnd());
215
216 writer.End(); // save changes to the current page
217 doc.PagePushBack(page);
218
219 doc.Save((output_path + "unicodewrite.pdf").c_str(), SDFDoc::e_remove_unused | SDFDoc::e_hex_strings , NULL);
220 cout << "Done. Result saved in unicodewrite.pdf..." << endl;
221 }
222 catch(Common::Exception& e)
223 {
224 cout << e << endl;
225 ret = 1;
226 }
227 catch(...)
228 {
229 cout << "Unknown Exception" << endl;
230 ret = 1;
231 }
232
233 PDFNet::Terminate();
234 return ret;
235}
236
237std::vector<UString> TextFileToStringList(const UString& file_path)
238{
239 Filters::MappedFile utf_text_filter(file_path);
240 size_t file_size = utf_text_filter.FileSize();
241 Filters::FilterReader utf_reader(utf_text_filter);
242 std::vector<unsigned char> data = utf_reader.Read(file_size);
243 data.push_back(0);
244 data.push_back(0);
245 std::vector<UString> ret;
246 size_t line_start = 0;
247 for (size_t i = 0; i+1 < data.size(); i +=2)
248 {
249 bool has_newline = false;
250 size_t end_index = i;
251 while(i+1 < data.size() && data[i+1] == 0 && (data[i] == '\n' || data[i] == '\r'))
252 {
253 i+=2;
254 has_newline = true;
255 }
256 if(has_newline || (i + 2 >= data.size() && line_start < data.size()))
257 {
258 ret.push_back(UString(reinterpret_cast<Unicode*>(&data[line_start]), (end_index - line_start)/2));
259 line_start = i;
260 }
261 }
262 return ret;
263}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales