Create Unicode Text, Embed CID in PDFs - UnicodeWrite

Sample code for using Apryse SDK to create Unicode text and embed composite fonts in PDF files. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using System.IO;
7using System.Text;
8using pdftron;
9using pdftron.Common;
10using pdftron.Filters;
11using pdftron.SDF;
12using pdftron.PDF;
13
14namespace UnicodeWriteTestCS
15{
16 /// <summary>
17 /// This example illustrates how to create Unicode text and how to embed composite fonts.
18 /// </summary>
19 class Class1
20 {
21 private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
22 static Class1() {}
23
24 // Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles'
25 // directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and
26 // MS Office.
27 //
28 // For more information about Arial Unicode MS, please consult the following Microsoft Knowledge
29 // Base Article: WD2002: General Information About the Arial Unicode MS Font
30 // http://support.microsoft.com/support/kb/articles/q287/2/47.asp
31 //
32 // For more information consult:
33 // http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
34 // http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
35 //
36 // In case you don't have access to Arial Unicode MS you can use cyberbit.ttf
37 // (ftp://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
38 //
39 static void Main(string[] args)
40 {
41 PDFNet.Initialize(PDFTronLicense.Key);
42
43 // Relative path to the folder containing test files.
44 string input_path = "../../../../TestFiles/";
45 string output_path = "../../../../TestFiles/Output/";
46
47 try
48 {
49 using (PDFDoc doc = new PDFDoc())
50 {
51 using (ElementBuilder eb = new ElementBuilder())
52 {
53 using (ElementWriter writer = new ElementWriter())
54 {
55 // Start a new page ------------------------------------
56 Page page = doc.PageCreate(new Rect(0, 0, 612, 794));
57
58 writer.Begin(page); // begin writing to this page
59
60 Font fnt = null;
61 try
62 {
63 // Full font embedding
64 System.Drawing.Font myfont = new System.Drawing.Font("Arial Unicode MS", 12);
65 fnt = Font.CreateCIDTrueTypeFont(doc, myfont, true, true);
66 }
67 catch (PDFNetException e)
68 {
69 Console.WriteLine(e.Message);
70 }
71
72 if (fnt == null) {
73 try
74 {
75 fnt = Font.CreateCIDTrueTypeFont(doc, input_path + "ARIALUNI.TTF", true, true);
76 }
77 catch (PDFNetException e)
78 {
79 Console.WriteLine(e.Message);
80 }
81 }
82
83 if (fnt == null)
84 {
85 try
86 {
87 fnt = Font.CreateCIDTrueTypeFont(doc, "C:/Windows/Fonts/ARIALUNI.TTF", true, true);
88 }
89 catch (PDFNetException e)
90 {
91 Console.WriteLine(e.Message);
92 }
93 }
94
95 if (fnt == null)
96 {
97 Console.WriteLine("Note: using system font substitution for unshaped unicode text");
98 fnt = Font.Create(doc, "Helvetica", "");
99 }
100 else
101 {
102 Console.WriteLine("Note: using Arial Unicode for unshaped unicode text");
103 }
104
105 Element element = eb.CreateTextBegin(fnt, 1);
106 element.SetTextMatrix(10, 0, 0, 10, 50, 600);
107 element.GetGState().SetLeading(2); // Set the spacing between lines
108 writer.WriteElement(element);
109
110 // Hello World!!!
111 string hello = "Hello World!";
112 writer.WriteElement(eb.CreateUnicodeTextRun(hello));
113 writer.WriteElement(eb.CreateTextNewLine());
114
115 // Latin
116 char[] latin = {
117 'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', '\x45', '\x0046', '\x00C0',
118 '\x00C1', '\x00C2', '\x0143', '\x0144', '\x0145', '\x0152', '1', '2' // etc.
119 };
120 writer.WriteElement(eb.CreateUnicodeTextRun(new string(latin)));
121 writer.WriteElement(eb.CreateTextNewLine());
122
123 // Greek
124 char[] greek = {
125 (char)0x039E, (char)0x039F, (char)0x03A0, (char)0x03A1, (char)0x03A3,
126 (char)0x03A6, (char)0x03A8, (char)0x03A9 // etc.
127 };
128 writer.WriteElement(eb.CreateUnicodeTextRun(new string(greek)));
129 writer.WriteElement(eb.CreateTextNewLine());
130
131 // Cyrillic
132 char[] cyrillic = {
133 (char)0x0409, (char)0x040A, (char)0x040B, (char)0x040C, (char)0x040E, (char)0x040F, (char)0x0410, (char)0x0411,
134 (char)0x0412, (char)0x0413, (char)0x0414, (char)0x0415, (char)0x0416, (char)0x0417, (char)0x0418, (char)0x0419 // etc.
135 };
136 writer.WriteElement(eb.CreateUnicodeTextRun(new string(cyrillic)));
137 writer.WriteElement(eb.CreateTextNewLine());
138
139 // Hebrew
140 char[] hebrew = {
141 (char)0x05D0, (char)0x05D1, (char)0x05D3, (char)0x05D3, (char)0x05D4, (char)0x05D5, (char)0x05D6, (char)0x05D7, (char)0x05D8,
142 (char)0x05D9, (char)0x05DA, (char)0x05DB, (char)0x05DC, (char)0x05DD, (char)0x05DE, (char)0x05DF, (char)0x05E0, (char)0x05E1 // etc.
143 };
144 writer.WriteElement(eb.CreateUnicodeTextRun(new string(hebrew)));
145 writer.WriteElement(eb.CreateTextNewLine());
146
147 // Arabic
148 char[] arabic = {
149 (char)0x0624, (char)0x0625, (char)0x0626, (char)0x0627, (char)0x0628, (char)0x0629, (char)0x062A, (char)0x062B, (char)0x062C,
150 (char)0x062D, (char)0x062E, (char)0x062F, (char)0x0630, (char)0x0631, (char)0x0632, (char)0x0633, (char)0x0634, (char)0x0635 // etc.
151 };
152 writer.WriteElement(eb.CreateUnicodeTextRun(new string(arabic)));
153 writer.WriteElement(eb.CreateTextNewLine());
154
155 // Thai
156 char[] thai = {
157 (char)0x0E01, (char)0x0E02, (char)0x0E03, (char)0x0E04, (char)0x0E05, (char)0x0E06, (char)0x0E07, (char)0x0E08, (char)0x0E09,
158 (char)0x0E0A, (char)0x0E0B, (char)0x0E0C, (char)0x0E0D, (char)0x0E0E, (char)0x0E0F, (char)0x0E10, (char)0x0E11, (char)0x0E12 // etc.
159 };
160 writer.WriteElement(eb.CreateUnicodeTextRun(new string(thai)));
161 writer.WriteElement(eb.CreateTextNewLine());
162
163 // Hiragana - Japanese
164 char[] hiragana = {
165 (char)0x3041, (char)0x3042, (char)0x3043, (char)0x3044, (char)0x3045, (char)0x3046, (char)0x3047, (char)0x3048, (char)0x3049,
166 (char)0x304A, (char)0x304B, (char)0x304C, (char)0x304D, (char)0x304E, (char)0x304F, (char)0x3051, (char)0x3051, (char)0x3052 // etc.
167 };
168 writer.WriteElement(eb.CreateUnicodeTextRun(new string(hiragana)));
169 writer.WriteElement(eb.CreateTextNewLine());
170
171 // CJK Unified Ideographs
172 char[] cjk_uni = {
173 (char)0x5841, (char)0x5842, (char)0x5843, (char)0x5844, (char)0x5845, (char)0x5846, (char)0x5847, (char)0x5848, (char)0x5849,
174 (char)0x584A, (char)0x584B, (char)0x584C, (char)0x584D, (char)0x584E, (char)0x584F, (char)0x5850, (char)0x5851, (char)0x5852 // etc.
175 };
176 writer.WriteElement(eb.CreateUnicodeTextRun(new string(cjk_uni)));
177 writer.WriteElement(eb.CreateTextNewLine());
178
179 // Simplified Chinese
180 char[] chinese_simplified = {
181 (char)0x4e16, (char)0x754c, (char)0x60a8,(char) 0x597d
182 };
183 writer.WriteElement(eb.CreateUnicodeTextRun(new string(chinese_simplified)));
184 writer.WriteElement(eb.CreateTextNewLine());
185
186 // Finish the block of text
187 writer.WriteElement(eb.CreateTextEnd());
188 Console.WriteLine("Now using text shaping logic to place text");
189
190 // Create a font in indexed encoding mode
191 // normally this would mean that we are required to provide glyph indices
192 // directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
193 // method to take care of this detail for us.
194 Font indexedFont = Font.CreateCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font.Encoding.e_Indices);
195 element = eb.CreateTextBegin(indexedFont, 10.0);
196 writer.WriteElement(element);
197
198 double linePos = 350.0;
199 double lineSpace = 20.0;
200
201 // Transform unicode text into an abstract collection of glyph indices and positioning info
202 ShapedText shapedText = indexedFont.GetShapedText("Shaped Hindi Text:");
203
204 // transform the shaped text info into a PDF element and write it to the page
205 element = eb.CreateShapedTextRun(shapedText);
206 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
207 linePos -= lineSpace;
208 writer.WriteElement(element);
209
210 // read in unicode text lines from a file File. ReadAllLines(path, Encoding.UTF8)
211 String[] hindiTextLines = File.ReadAllLines(input_path + "hindi_sample_utf16le.txt", Encoding.UTF8);
212
213 Console.WriteLine("Read in " + hindiTextLines.Length + " lines of Unicode text from file");
214 foreach (String textLine in hindiTextLines)
215 {
216 shapedText = indexedFont.GetShapedText(textLine);
217 element = eb.CreateShapedTextRun(shapedText);
218 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
219 linePos -= lineSpace;
220 writer.WriteElement(element);
221 Console.WriteLine("Wrote shaped line to page");
222 }
223
224 // Finish the shaped block of text
225 writer.WriteElement(eb.CreateTextEnd());
226
227 writer.End(); // save changes to the current page
228 doc.PagePushBack(page);
229 doc.Save(output_path + "unicodewrite.pdf", SDFDoc.SaveOptions.e_remove_unused | SDFDoc.SaveOptions.e_hex_strings);
230 Console.WriteLine("Done. Result saved in unicodewrite.pdf...");
231 }
232 }
233 }
234 }
235 catch (PDFNetException e)
236 {
237 Console.WriteLine(e.Message);
238 }
239 PDFNet.Terminate();
240 }
241 }
242}

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales
Create Unicode Text, Embed CID Fonts in PDF - Sample Code | Apryse documentation