Create Unicode Text, Embed CID in PDFs - Python Sample Code

Sample code for using Apryse SDK to create Unicode text and embed composite fonts in PDF files. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK.

1//
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3//
4
5using System;
6using System.IO;
7using System.Text;
8using pdftron;
9using pdftron.Common;
10using pdftron.Filters;
11using pdftron.SDF;
12using pdftron.PDF;
13
14namespace UnicodeWriteTestCS
15{
16	/// <summary>
17	/// This example illustrates how to create Unicode text and how to embed composite fonts.
18	/// </summary>
19	class Class1
20	{
21		private static pdftron.PDFNetLoader pdfNetLoader = pdftron.PDFNetLoader.Instance();
22		static Class1() {}
23		
24		// Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles' 
25		// directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and 
26		// MS Office.
27		// 
28		// For more information about Arial Unicode MS, please consult the following Microsoft Knowledge 
29		// Base Article: WD2002: General Information About the Arial Unicode MS Font
30		//  http://support.microsoft.com/support/kb/articles/q287/2/47.asp
31		//
32		// For more information consult: 
33		//    http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
34		//    http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
35		// 
36		// In case you don't have access to Arial Unicode MS you can use cyberbit.ttf 
37		// (ftp://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
38		//
39		static void Main(string[] args)
40		{
41			PDFNet.Initialize(PDFTronLicense.Key);
42
43			// Relative path to the folder containing test files.
44			string input_path =  "../../../../TestFiles/";
45			string output_path = "../../../../TestFiles/Output/";
46
47			try	
48			{
49				using (PDFDoc doc = new PDFDoc())
50				{
51					using (ElementBuilder eb = new ElementBuilder())
52					{
53						using (ElementWriter writer = new ElementWriter())
54						{
55							// Start a new page ------------------------------------
56							Page page = doc.PageCreate(new Rect(0, 0, 612, 794));
57
58							writer.Begin(page); // begin writing to this page
59
60							Font fnt = null;
61							try
62							{
63								// Full font embedding
64								System.Drawing.Font myfont = new System.Drawing.Font("Arial Unicode MS", 12);
65								fnt = Font.CreateCIDTrueTypeFont(doc, myfont, true, true);
66							}
67							catch (PDFNetException e)
68							{
69								Console.WriteLine(e.Message);
70							}
71
72							if (fnt == null) {
73								try
74								{
75									fnt = Font.CreateCIDTrueTypeFont(doc, input_path + "ARIALUNI.TTF", true, true);
76								}
77								catch (PDFNetException e)
78								{
79									Console.WriteLine(e.Message);
80								}
81							}
82
83							if (fnt == null)
84							{
85								try
86								{
87									fnt = Font.CreateCIDTrueTypeFont(doc, "C:/Windows/Fonts/ARIALUNI.TTF", true, true);
88								}
89								catch (PDFNetException e)
90								{
91									Console.WriteLine(e.Message);
92								}
93							}
94
95							if (fnt == null)
96							{
97								Console.WriteLine("Note: using system font substitution for unshaped unicode text");
98								fnt = Font.Create(doc, "Helvetica", "");
99							}
100							else
101							{
102								Console.WriteLine("Note: using Arial Unicode for unshaped unicode text");
103							}
104
105							Element element = eb.CreateTextBegin(fnt, 1);
106							element.SetTextMatrix(10, 0, 0, 10, 50, 600);
107							element.GetGState().SetLeading(2);		 // Set the spacing between lines
108							writer.WriteElement(element);
109
110							// Hello World!!!
111							string hello = "Hello World!";
112							writer.WriteElement(eb.CreateUnicodeTextRun(hello));
113							writer.WriteElement(eb.CreateTextNewLine());
114
115							// Latin
116							char[] latin = {   
117								'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', '\x45', '\x0046', '\x00C0', 
118								'\x00C1', '\x00C2', '\x0143', '\x0144', '\x0145', '\x0152', '1', '2' // etc.
119							};
120							writer.WriteElement(eb.CreateUnicodeTextRun(new string(latin)));
121							writer.WriteElement(eb.CreateTextNewLine());
122
123							// Greek
124							char[] greek = {   
125								(char)0x039E, (char)0x039F, (char)0x03A0, (char)0x03A1, (char)0x03A3,
126								(char)0x03A6, (char)0x03A8, (char)0x03A9  // etc.
127							};
128							writer.WriteElement(eb.CreateUnicodeTextRun(new string(greek)));
129							writer.WriteElement(eb.CreateTextNewLine());
130
131							// Cyrillic
132							char[] cyrillic = {   
133								(char)0x0409, (char)0x040A, (char)0x040B, (char)0x040C, (char)0x040E, (char)0x040F, (char)0x0410, (char)0x0411,
134								(char)0x0412, (char)0x0413, (char)0x0414, (char)0x0415, (char)0x0416, (char)0x0417, (char)0x0418, (char)0x0419 // etc.
135							};
136							writer.WriteElement(eb.CreateUnicodeTextRun(new string(cyrillic)));
137							writer.WriteElement(eb.CreateTextNewLine());
138
139							// Hebrew
140							char[] hebrew = {
141								(char)0x05D0, (char)0x05D1, (char)0x05D3, (char)0x05D3, (char)0x05D4, (char)0x05D5, (char)0x05D6, (char)0x05D7, (char)0x05D8, 
142								(char)0x05D9, (char)0x05DA, (char)0x05DB, (char)0x05DC, (char)0x05DD, (char)0x05DE, (char)0x05DF, (char)0x05E0, (char)0x05E1 // etc. 
143							};
144							writer.WriteElement(eb.CreateUnicodeTextRun(new string(hebrew)));
145							writer.WriteElement(eb.CreateTextNewLine());
146
147							// Arabic
148							char[] arabic = {
149								(char)0x0624, (char)0x0625, (char)0x0626, (char)0x0627, (char)0x0628, (char)0x0629, (char)0x062A, (char)0x062B, (char)0x062C, 
150								(char)0x062D, (char)0x062E, (char)0x062F, (char)0x0630, (char)0x0631, (char)0x0632, (char)0x0633, (char)0x0634, (char)0x0635 // etc. 
151							};
152							writer.WriteElement(eb.CreateUnicodeTextRun(new string(arabic)));
153							writer.WriteElement(eb.CreateTextNewLine());
154
155							// Thai 
156							char[] thai = {
157								(char)0x0E01, (char)0x0E02, (char)0x0E03, (char)0x0E04, (char)0x0E05, (char)0x0E06, (char)0x0E07, (char)0x0E08, (char)0x0E09, 
158								(char)0x0E0A, (char)0x0E0B, (char)0x0E0C, (char)0x0E0D, (char)0x0E0E, (char)0x0E0F, (char)0x0E10, (char)0x0E11, (char)0x0E12 // etc. 
159							};
160							writer.WriteElement(eb.CreateUnicodeTextRun(new string(thai)));
161							writer.WriteElement(eb.CreateTextNewLine());
162
163							// Hiragana - Japanese 
164							char[] hiragana = {
165								(char)0x3041, (char)0x3042, (char)0x3043, (char)0x3044, (char)0x3045, (char)0x3046, (char)0x3047, (char)0x3048, (char)0x3049, 
166								(char)0x304A, (char)0x304B, (char)0x304C, (char)0x304D, (char)0x304E, (char)0x304F, (char)0x3051, (char)0x3051, (char)0x3052 // etc. 
167							};
168							writer.WriteElement(eb.CreateUnicodeTextRun(new string(hiragana)));
169							writer.WriteElement(eb.CreateTextNewLine());
170
171							// CJK Unified Ideographs
172							char[] cjk_uni = {
173								(char)0x5841, (char)0x5842, (char)0x5843, (char)0x5844, (char)0x5845, (char)0x5846, (char)0x5847, (char)0x5848, (char)0x5849, 
174								(char)0x584A, (char)0x584B, (char)0x584C, (char)0x584D, (char)0x584E, (char)0x584F, (char)0x5850, (char)0x5851, (char)0x5852 // etc. 
175							};
176							writer.WriteElement(eb.CreateUnicodeTextRun(new string(cjk_uni)));
177							writer.WriteElement(eb.CreateTextNewLine());
178
179							// Simplified Chinese
180							char[] chinese_simplified = {
181								(char)0x4e16, (char)0x754c, (char)0x60a8,(char) 0x597d
182							};
183							writer.WriteElement(eb.CreateUnicodeTextRun(new string(chinese_simplified)));
184							writer.WriteElement(eb.CreateTextNewLine());
185
186							// Finish the block of text
187							writer.WriteElement(eb.CreateTextEnd());		
188							Console.WriteLine("Now using text shaping logic to place text");
189
190							// Create a font in indexed encoding mode 
191							// normally this would mean that we are required to provide glyph indices
192							// directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
193							// method to take care of this detail for us.
194							Font indexedFont = Font.CreateCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font.Encoding.e_Indices);
195							element = eb.CreateTextBegin(indexedFont, 10.0);
196							writer.WriteElement(element);
197
198							double linePos = 350.0;
199							double lineSpace = 20.0;
200
201							// Transform unicode text into an abstract collection of glyph indices and positioning info 
202							ShapedText shapedText = indexedFont.GetShapedText("Shaped Hindi Text:");
203
204							// transform the shaped text info into a PDF element and write it to the page
205							element = eb.CreateShapedTextRun(shapedText);
206							element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
207							linePos -= lineSpace;
208							writer.WriteElement(element);
209
210							// read in unicode text lines from a file File. ReadAllLines(path, Encoding.UTF8)
211							String[] hindiTextLines = File.ReadAllLines(input_path + "hindi_sample_utf16le.txt", Encoding.UTF8);
212
213							Console.WriteLine("Read in " + hindiTextLines.Length + " lines of Unicode text from file");
214							foreach (String textLine in hindiTextLines)
215							{
216								shapedText = indexedFont.GetShapedText(textLine);
217								element = eb.CreateShapedTextRun(shapedText);
218								element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
219								linePos -= lineSpace;
220								writer.WriteElement(element);
221								Console.WriteLine("Wrote shaped line to page");
222							}
223
224							// Finish the shaped block of text
225							writer.WriteElement(eb.CreateTextEnd());
226
227							writer.End();  // save changes to the current page
228							doc.PagePushBack(page);
229							doc.Save(output_path + "unicodewrite.pdf", SDFDoc.SaveOptions.e_remove_unused | SDFDoc.SaveOptions.e_hex_strings);
230							Console.WriteLine("Done. Result saved in unicodewrite.pdf...");
231						}
232					}
233				}
234			}
235			catch (PDFNetException e)
236			{
237				Console.WriteLine(e.Message);
238			}
239			PDFNet.Terminate();
240		}
241	}
242}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5#include <PDF/PDFNet.h>
6#include <PDF/PDFDoc.h>
7#include <PDF/ElementBuilder.h>
8#include <PDF/ElementWriter.h>
9#include <PDF/ColorSpace.h>
10#include <PDF/ShapedText.h>
11#include <PDF/Rect.h>
12
13#include <Filters/MappedFile.h>
14#include <Filters/FilterReader.h>
15
16#include <fstream>
17#include <iostream>
18#include "../../LicenseKey/CPP/LicenseKey.h"
19
20using namespace std;
21
22using namespace pdftron;
23using namespace SDF;
24using namespace PDF;
25
26std::vector<UString> TextFileToStringList(const UString& file_path);
27
28/**
29 * This example illustrates how to create Unicode text and how to embed composite fonts.
30 * 
31 * Note: This demo attempts to make use of 'arialuni.ttf' in the '/Samples/TestFiles' 
32 * directory. Arial Unicode MS is about 24MB in size and used to come together with Windows and 
33 * MS Office.
34 * 
35 * In case you don't have access to Arial Unicode MS you can use another wide coverage
36 * font, like Google Noto, GNU UniFont, or cyberbit. Many of these are freely available,
37 * and there is a list maintained at https://en.wikipedia.org/wiki/Unicode_font
38 * 
39 * If no specific font file can be loaded, the demo will fall back to system specific font
40 * substitution routines, and the result will depend on which fonts are available.
41 * 
42 */
43int main(int argc, char *argv[])
44{
45	int ret = 0;
46	PDFNet::Initialize(LicenseKey);
47
48	// Relative path to the folder containing test files.
49	string input_path =  "../../TestFiles/";
50	string output_path = "../../TestFiles/Output/";
51
52	try  
53	{	 
54		PDFDoc doc;
55
56		ElementBuilder eb;		
57		ElementWriter writer;	
58
59		// Start a new page ------------------------------------
60		Page page = doc.PageCreate(Rect(0, 0, 612, 794));
61
62		writer.Begin(page);	// begin writing to this page
63
64		string font_program = input_path + "ARIALUNI.TTF";
65
66		// RAII block for ifstream
67		{
68			std::ifstream ifs(font_program.c_str(), ios_base::in);
69#if defined(_WIN32)
70			if (!ifs.is_open()) {
71				font_program = string("C:/Windows/Fonts/ARIALUNI.TTF");
72				ifs.open(font_program.c_str(), ios_base::in);
73			}
74#endif
75			if (!ifs.is_open()) {
76				font_program.clear();
77			}
78		}
79
80		Font fnt;
81		if(font_program.size())
82		{
83			cout << "Note: using " << font_program << " for unshaped unicode text" << endl;
84			// if we can find a specific wide-coverage font file, then use that directly
85			fnt = Font::CreateCIDTrueTypeFont(doc, font_program.c_str(), true, true);
86		}
87		else
88		{
89			cout << "Note: using system font substitution for unshaped unicode text" << endl;
90			// if we can't find a specific file, then use system font subsitution 
91			// as a fallback, using "Helvetica" as a hint
92			UString empty_temp;
93			fnt = Font::Create(doc, "Helvetica", empty_temp);
94		}
95		
96		Element element = eb.CreateTextBegin(fnt, 1);
97		element.SetTextMatrix(10, 0, 0, 10, 50, 600);
98		element.GetGState().SetLeading(2);		 // Set the spacing between lines
99		writer.WriteElement(element);
100
101		// Hello World!
102		Unicode hello[] = { 'H','e','l','l','o',' ','W','o','r','l','d','!'};
103		writer.WriteElement(eb.CreateUnicodeTextRun(hello, sizeof(hello)/sizeof(Unicode)));
104		writer.WriteElement(eb.CreateTextNewLine());
105
106		// Latin
107		Unicode latin[] = {   
108			'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
109			0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' // etc.
110		};
111		writer.WriteElement(eb.CreateUnicodeTextRun(latin, sizeof(latin)/sizeof(Unicode)));
112		writer.WriteElement(eb.CreateTextNewLine());
113
114		// Greek
115		Unicode greek[] = {   
116			0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9  // etc.
117		};
118		writer.WriteElement(eb.CreateUnicodeTextRun(greek, sizeof(greek)/sizeof(Unicode)));
119		writer.WriteElement(eb.CreateTextNewLine());
120
121		// Cyrillic
122		Unicode cyrillic[] = {   
123			0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
124			0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419 // etc.
125		};
126		writer.WriteElement(eb.CreateUnicodeTextRun(cyrillic, sizeof(cyrillic)/sizeof(Unicode)));
127		writer.WriteElement(eb.CreateTextNewLine());
128
129		// Hebrew
130		Unicode hebrew[] = {
131			0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 
132			0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1 // etc. 
133		};
134		writer.WriteElement(eb.CreateUnicodeTextRun(hebrew, sizeof(hebrew)/sizeof(Unicode)));
135		writer.WriteElement(eb.CreateTextNewLine());
136
137		// Arabic
138		Unicode arabic[] = {
139			0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 
140			0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635 // etc. 
141		};
142		writer.WriteElement(eb.CreateUnicodeTextRun(arabic, sizeof(arabic)/sizeof(Unicode)));
143		writer.WriteElement(eb.CreateTextNewLine());
144
145		// Thai 
146		Unicode thai[] = {
147			0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
148			0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12 // etc. 
149		};
150		writer.WriteElement(eb.CreateUnicodeTextRun(thai, sizeof(thai)/sizeof(Unicode)));
151		writer.WriteElement(eb.CreateTextNewLine());
152
153		// Hiragana - Japanese 
154		Unicode hiragana[] = {
155			0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 
156			0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052 // etc. 
157		};
158		writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, sizeof(hiragana)/sizeof(Unicode)));
159		writer.WriteElement(eb.CreateTextNewLine());
160
161		// CJK Unified Ideographs
162		Unicode cjk_uni[] = {
163			0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
164			0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852 // etc. 
165		};
166		writer.WriteElement(eb.CreateUnicodeTextRun(cjk_uni, sizeof(cjk_uni)/sizeof(Unicode)));
167		writer.WriteElement(eb.CreateTextNewLine());
168
169		// Simplified Chinese
170		Unicode chinese_simplified[] = {
171			0x4e16, 0x754c, 0x60a8, 0x597d
172		};
173		writer.WriteElement(eb.CreateUnicodeTextRun(chinese_simplified, sizeof(chinese_simplified)/sizeof(Unicode)));
174		writer.WriteElement(eb.CreateTextNewLine());
175
176		// Finish the block of text
177		writer.WriteElement(eb.CreateTextEnd());
178
179		cout << "Now using text shaping logic to place text" << endl;
180
181		// Create a font in indexed encoding mode 
182		// normally this would mean that we are required to provide glyph indices
183		// directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
184		// method to take care of this detail for us.
185		Font indexed_font = Font::CreateCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font::e_Indices);
186		element = eb.CreateTextBegin(indexed_font, 10);
187		writer.WriteElement(element);
188
189		double line_pos = 350.0;
190		double line_space = 20.0;
191
192		// Transform unicode text into an abstract collection of glyph indices and positioning info 
193		ShapedText shaped_text = indexed_font.GetShapedText(UString("Shaped Hindi Text:"));
194
195		// transform the shaped text info into a PDF element and write it to the page
196		element = eb.CreateShapedTextRun(shaped_text);
197		element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos);
198		writer.WriteElement(element);
199
200		// read in unicode text lines from a file 
201		std::vector<UString> hindi_text = TextFileToStringList(input_path + "hindi_sample_utf16le.txt");
202
203		cout << "Read in " << hindi_text.size() << " lines of Unicode text from file" << endl;
204		for (size_t i = 0; i < hindi_text.size(); ++i)
205		{
206			shaped_text = indexed_font.GetShapedText(hindi_text[i]);
207			element = eb.CreateShapedTextRun(shaped_text);
208			element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1));
209			writer.WriteElement(element);
210			cout << "Wrote shaped line to page" << endl;
211		}
212		
213		// Finish the shaped block of text
214		writer.WriteElement(eb.CreateTextEnd());
215
216		writer.End();  // save changes to the current page
217		doc.PagePushBack(page);
218
219		doc.Save((output_path + "unicodewrite.pdf").c_str(), SDFDoc::e_remove_unused | SDFDoc::e_hex_strings , NULL);
220		cout << "Done. Result saved in unicodewrite.pdf..." << endl;
221	}
222	catch(Common::Exception& e)
223	{
224		cout << e << endl;
225		ret = 1;
226	}
227	catch(...)
228	{
229		cout << "Unknown Exception" << endl;
230		ret = 1;
231	}
232	
233	PDFNet::Terminate();
234	return ret;
235}
236
237std::vector<UString> TextFileToStringList(const UString& file_path)
238{
239	Filters::MappedFile utf_text_filter(file_path);
240	size_t file_size = utf_text_filter.FileSize();
241	Filters::FilterReader utf_reader(utf_text_filter);
242	std::vector<unsigned char> data = utf_reader.Read(file_size);
243	data.push_back(0);
244	data.push_back(0);
245	std::vector<UString> ret;
246	size_t line_start = 0;
247	for (size_t i = 0; i+1 < data.size(); i +=2)
248	{
249		bool has_newline = false;
250		size_t end_index = i;
251		while(i+1 < data.size() && data[i+1] == 0 && (data[i] == '\n' || data[i] == '\r'))
252		{
253			i+=2;
254			has_newline = true;
255		}
256		if(has_newline || (i + 2 >=  data.size() && line_start < data.size()))
257		{
258			ret.push_back(UString(reinterpret_cast<Unicode*>(&data[line_start]), (end_index - line_start)/2));
259			line_start = i;
260		}
261	}
262	return ret;
263}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6import com.pdftron.pdf.*;
7import com.pdftron.sdf.SDFDoc;
8import java.util.List;
9import java.nio.file.Files;
10import java.nio.file.Paths;
11import java.nio.charset.StandardCharsets;
12
13/**
14 * This example illustrates how to create Unicode text and how to embed composite fonts.
15 * <p>
16 * Note: This demo attempts to make use of 'arialuni.ttf' in the '/Samples/TestFiles' 
17 * directory. Arial Unicode MS is about 24MB in size and used to come together with Windows and 
18 * MS Office.
19 * <p>
20 * In case you don't have access to Arial Unicode MS you can use another wide coverage
21 * font, like Google Noto, GNU UniFont, or cyberbit. Many of these are freely available,
22 * and there is a list maintained at https://en.wikipedia.org/wiki/Unicode_font
23 * <p>
24 * If no specific font file can be loaded, the demo will fall back to system specific font
25 * substitution routines, and the result will depend on which fonts are available.
26 * 
27 */
28public class UnicodeWriteTest {
29    public static void main(String[] args) {
30        PDFNet.initialize(PDFTronLicense.Key());
31
32        // Relative path to the folder containing test files.
33        String input_path = "../../TestFiles/";
34        String output_path = "../../TestFiles/Output/";
35
36        try (PDFDoc doc = new PDFDoc()) {
37            ElementBuilder eb = new ElementBuilder();
38            ElementWriter writer = new ElementWriter();
39
40            // Start a new page ------------------------------------
41            Page page = doc.pageCreate(new Rect(0, 0, 612, 794));
42
43            writer.begin(page);    // begin writing to this page
44
45            String fontLocation = input_path + "ARIALUNI.TTF";
46
47            Font fnt = null;
48            try {
49                // Embed and subset the font
50                fnt = Font.createCIDTrueTypeFont(doc, fontLocation, true, true);
51            } catch (Exception e) {
52                fontLocation = "C:/Windows/Fonts/ARIALUNI.TTF";
53                try {
54                     fnt = Font.createCIDTrueTypeFont(doc, fontLocation, true, true);
55                }
56                catch (Exception e2) {
57                    fontLocation = null;
58                }
59            }
60
61            if(fnt != null) {
62                System.out.println("Note: using " + fontLocation + " for unshaped unicode text");
63            }
64            else {
65                System.out.println("Note: using system font substitution for unshaped unicode text");
66                fnt = Font.create(doc, "Helvetica", "");
67            }
68
69            Element element = eb.createTextBegin(fnt, 1);
70            element.setTextMatrix(10, 0, 0, 10, 50, 600);
71            element.getGState().setLeading(2);         // Set the spacing between lines
72            writer.writeElement(element);
73
74            // Hello World!
75            char hello[] = {'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', '!'};
76            writer.writeElement(eb.createUnicodeTextRun(new String(hello)));
77            writer.writeElement(eb.createTextNewLine());
78
79            // Latin
80            char latin[] = {
81                    'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0,
82                    0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' // etc.
83            };
84            writer.writeElement(eb.createUnicodeTextRun(new String(latin)));
85            writer.writeElement(eb.createTextNewLine());
86
87            // Greek
88            char greek[] = {
89                    0x039E, 0x039F, 0x03A0, 0x03A1, 0x03A3, 0x03A6, 0x03A8, 0x03A9  // etc.
90            };
91            writer.writeElement(eb.createUnicodeTextRun(new String(greek)));
92            writer.writeElement(eb.createTextNewLine());
93
94            // Cyrillic
95            char cyrilic[] = {
96                    0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
97                    0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419 // etc.
98            };
99            writer.writeElement(eb.createUnicodeTextRun(new String(cyrilic)));
100            writer.writeElement(eb.createTextNewLine());
101
102            // Hebrew
103            char hebrew[] = {
104                    0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
105                    0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1 // etc.
106            };
107            writer.writeElement(eb.createUnicodeTextRun(new String(hebrew)));
108            writer.writeElement(eb.createTextNewLine());
109
110            // Arabic
111            char arabic[] = {
112                    0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
113                    0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635 // etc.
114            };
115            writer.writeElement(eb.createUnicodeTextRun(new String(arabic)));
116            writer.writeElement(eb.createTextNewLine());
117
118            // Thai
119            char thai[] = {
120                    0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09,
121                    0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12 // etc.
122            };
123            writer.writeElement(eb.createUnicodeTextRun(new String(thai)));
124            writer.writeElement(eb.createTextNewLine());
125
126            // Hiragana - Japanese
127            char hiragana[] = {
128                    0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
129                    0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052 // etc.
130            };
131            writer.writeElement(eb.createUnicodeTextRun(new String(hiragana)));
132            writer.writeElement(eb.createTextNewLine());
133
134            // CJK Unified Ideographs
135            char cjk_uni[] = {
136                    0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849,
137                    0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852 // etc.
138            };
139            writer.writeElement(eb.createUnicodeTextRun(new String(cjk_uni)));
140            writer.writeElement(eb.createTextNewLine());
141
142            // Simplified Chinese
143            char chinese_simplified[] = {
144              0x4e16, 0x754c, 0x60a8, 0x597d
145            };
146            writer.writeElement(eb.createUnicodeTextRun(new String(chinese_simplified)));
147            writer.writeElement(eb.createTextNewLine());
148
149            // Finish the block of text
150            writer.writeElement(eb.createTextEnd());
151
152            System.out.println("Now using text shaping logic to place text");
153
154            // Create a font in indexed encoding mode 
155            // normally this would mean that we are required to provide glyph indices
156            // directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
157            // method to take care of this detail for us.
158            Font indexedFont = Font.createCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font.e_Indices);
159            element = eb.createTextBegin(indexedFont, 10.0);
160            writer.writeElement(element);
161
162            double linePos = 350.0;
163            double lineSpace = 20.0;
164
165            // Transform unicode text into an abstract collection of glyph indices and positioning info 
166            ShapedText shapedText = indexedFont.getShapedText("Shaped Hindi Text:");
167
168            // transform the shaped text info into a PDF element and write it to the page
169            element = eb.createShapedTextRun(shapedText);
170            element.setTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
171            linePos -= lineSpace;
172            writer.writeElement(element);
173
174            // read in unicode text lines from a file 
175            List<String> hindiTextLines = Files.readAllLines(Paths.get(input_path + "hindi_sample_utf16le.txt"), StandardCharsets.UTF_16LE);
176
177            System.out.println("Read in " + hindiTextLines.size() + " lines of Unicode text from file");
178            for (String textLine : hindiTextLines)  
179            {
180                shapedText = indexedFont.getShapedText(textLine);
181                element = eb.createShapedTextRun(shapedText);
182                element.setTextMatrix(1.5, 0, 0, 1.5, 50, linePos);
183                linePos -= lineSpace;
184                writer.writeElement(element);
185                System.out.println("Wrote shaped line to page");
186            }
187        
188            // Finish the shaped block of text
189            writer.writeElement(eb.createTextEnd());
190
191
192            writer.end();  // save changes to the current page
193            doc.pagePushBack(page);
194
195            doc.save(output_path + "unicodewrite.pdf", new SDFDoc.SaveMode[]{SDFDoc.SaveMode.REMOVE_UNUSED, SDFDoc.SaveMode.HEX_STRINGS}, null);
196            System.out.println("Done. Result saved in unicodewrite.pdf...");
197        } catch (Exception e) {
198            e.printStackTrace();
199        }
200
201        PDFNet.terminate();
202    }
203
204}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2021 by PDFTron Systems Inc. All Rights Reserved.
3// Consult LICENSE.txt regarding license information.
4//---------------------------------------------------------------------------------------
5
6package main
7import (
8	"fmt"
9    "os"
10    "bufio"
11    "strconv"
12    "runtime"
13	. "pdftron"
14    "golang.org/x/text/encoding/unicode"
15    "golang.org/x/text/transform"
16)
17
18import  "pdftron/Samples/LicenseKey/GO"
19
20// Relative path to the folder containing the test files.
21var inputPath = "../../TestFiles/"
22var outputPath = "../../TestFiles/Output/"
23
24// This example illustrates how to create Unicode text and how to embed composite fonts.
25// 
26// Note: This demo attempts to make use of 'arialuni.ttf' in the '/Samples/TestFiles' 
27// directory. Arial Unicode MS is about 24MB in size and used to come together with Windows and 
28// MS Office.
29// 
30// In case you don't have access to Arial Unicode MS you can use another wide coverage
31// font, like Google Noto, GNU UniFont, or cyberbit. Many of these are freely available,
32// and there is a list maintained at https://en.wikipedia.org/wiki/Unicode_font
33// 
34// If no specific font file can be loaded, the demo will fall back to system specific font
35// substitution routines, and the result will depend on which fonts are available.
36//
37// Run "go get golang.org/x/text/encoding/unicode" and "go get golang.org/x/text/transform" to install, 
38// if these two packages are not presented.
39 
40func ReadUnicodeTextLinesFromFile(  writer ElementWriter, 
41                                    indexedFont Font, 
42                                    eb ElementBuilder, 
43                                    linePos float64, 
44                                    lineSpace float64, 
45                                    showNumOfLines bool, 
46                                    readLines bool){
47    file, err := os.Open(inputPath + "hindi_sample_utf16le.txt")
48    if err != nil {
49        fmt.Println(err)
50    }
51    defer file.Close()
52    scanner := bufio.NewScanner(transform.NewReader(file, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()))
53    i := 0
54    if(showNumOfLines){
55        for scanner.Scan() {
56            i++
57        }
58        fmt.Println("Read in " + strconv.Itoa(i) + " lines of Unicode text from file")
59    }else if(readLines){
60        for scanner.Scan() {
61            shapedText := indexedFont.GetShapedText(scanner.Text())
62            element := eb.CreateShapedTextRun(shapedText)
63            element.SetTextMatrix(1.5, 0.0, 0.0, 1.5, 50.0, linePos-lineSpace*(float64(i+1)))
64            writer.WriteElement(element)
65            fmt.Println("Wrote shaped line to page")  
66            i++
67        }
68    }
69    if err := scanner.Err(); err != nil {
70        fmt.Println(err)
71    }
72}
73
74func main(){
75    PDFNetInitialize(PDFTronLicense.Key)
76    
77    doc := NewPDFDoc()
78    eb := NewElementBuilder()
79    writer := NewElementWriter()
80    
81    // Start a new page ------------------------------------
82    page := doc.PageCreate(NewRect(0.0, 0.0, 612.0, 794.0))
83    
84    writer.Begin(page)    // begin writing to this page
85       
86    // Embed and subset the font
87    fontProgram := inputPath + "ARIALUNI.TTF"
88    fnt := FontCreate(doc.GetSDFDoc(), "Helvetica", "")
89    if _, err := os.Stat(fontProgram); err == nil{
90      // fontProgram exists
91      fnt = FontCreateCIDTrueTypeFont(doc.GetSDFDoc(), fontProgram, true, true)
92      fmt.Println("Note: using " + fontProgram + " for unshaped unicode text")
93    }else if os.IsNotExist(err){
94        if runtime.GOOS == "windows"{
95            fontProgram = "C:/Windows/Fonts/ARIALUNI.TTF"
96            if _, err := os.Stat(fontProgram); err == nil{
97              // fontProgram exists
98                fnt = FontCreateCIDTrueTypeFont(doc.GetSDFDoc(), fontProgram, true, true)
99                fmt.Println("Note: using " + fontProgram + " for unshaped unicode text")
100            }else if os.IsNotExist(err){
101                fmt.Println("Note: using system font substitution for unshaped unicode text")
102            }else{
103                fmt.Println(err)
104            }
105        }
106    }else{
107        fmt.Println(err)
108    }
109
110    element := eb.CreateTextBegin(fnt, 1.0)
111    element.SetTextMatrix(10.0, 0.0, 0.0, 10.0, 50.0, 600.0)
112    element.GetGState().SetLeading(2)         // Set the spacing between lines
113    writer.WriteElement(element)
114
115    // Hello World!
116    hello := []uint16{'H','e','l','l','o',' ','W','o','r','l','d','!'}
117    fmt.Println(hello)
118    writer.WriteElement(eb.CreateUnicodeTextRun(&hello[0], uint(len(hello))))
119    writer.WriteElement(eb.CreateTextNewLine())
120    
121    // Latin
122    latin := []uint16{'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
123            0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' }// etc.
124    writer.WriteElement(eb.CreateUnicodeTextRun(&latin[0], uint(len(latin))))
125    writer.WriteElement(eb.CreateTextNewLine())
126
127    // Greek
128    greek := []uint16{0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9}
129    writer.WriteElement(eb.CreateUnicodeTextRun(&greek[0], uint(len(greek))))
130    writer.WriteElement(eb.CreateTextNewLine())
131    
132    // Cyrillic
133    cyrillic := []uint16{0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
134                0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419}
135    writer.WriteElement(eb.CreateUnicodeTextRun(&cyrillic[0], uint(len(cyrillic))))
136    writer.WriteElement(eb.CreateTextNewLine())
137    
138    // Hebrew
139    hebrew := []uint16{0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
140              0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1}
141    writer.WriteElement(eb.CreateUnicodeTextRun(&hebrew[0], uint(len(hebrew))))
142    writer.WriteElement(eb.CreateTextNewLine())
143    
144    // Arabic
145    arabic := []uint16{0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
146              0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635}
147    writer.WriteElement(eb.CreateUnicodeTextRun(&arabic[0], uint(len(arabic))))
148    writer.WriteElement(eb.CreateTextNewLine())
149    
150    // Thai
151    thai := []uint16{0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
152            0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12}
153    writer.WriteElement(eb.CreateUnicodeTextRun(&thai[0], uint(len(thai))))
154    writer.WriteElement(eb.CreateTextNewLine())
155    
156    // Hiragana - Japanese 
157    hiragana := []uint16{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
158                0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052}
159    writer.WriteElement(eb.CreateUnicodeTextRun(&hiragana[0], uint(len(hiragana))))
160    writer.WriteElement(eb.CreateTextNewLine())
161    
162    // CJK Unified Ideographs 
163    cjk_uni := []uint16{0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
164               0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852}
165    writer.WriteElement(eb.CreateUnicodeTextRun(&cjk_uni[0], uint(len(cjk_uni))))
166    writer.WriteElement(eb.CreateTextNewLine())
167    
168    // Simplified Chinese
169    chineseSimplified := []uint16{0x4e16, 0x754c, 0x60a8, 0x597d}
170    writer.WriteElement(eb.CreateUnicodeTextRun(&chineseSimplified[0], uint(len(chineseSimplified))))
171    writer.WriteElement(eb.CreateTextNewLine())
172
173    // Finish the block of text
174    writer.WriteElement(eb.CreateTextEnd())
175
176    fmt.Println("Now using text shaping logic to place text")
177
178    // Create a font in indexed encoding mode 
179    // normally this would mean that we are required to provide glyph indices
180    // directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
181    // method to take care of this detail for us.
182    indexedFont := FontCreateCIDTrueTypeFont(doc.GetSDFDoc(), inputPath + "NotoSans_with_hindi.ttf", true, true, FontE_Indices)
183    element = eb.CreateTextBegin(indexedFont, 10.0)
184    writer.WriteElement(element)
185
186    linePos := 350.0
187    lineSpace := 20.0
188
189    // Transform unicode text into an abstract collection of glyph indices and positioning info 
190    shapedText := indexedFont.GetShapedText("Shaped Hindi Text:")
191
192    // transform the shaped text info into a PDF element and write it to the page
193    element = eb.CreateShapedTextRun(shapedText)
194    element.SetTextMatrix(1.5, 0.0, 0.0, 1.5, 50.0, linePos)
195    writer.WriteElement(element)
196    // read in unicode text lines from a file
197    ReadUnicodeTextLinesFromFile(writer, indexedFont, eb, linePos, lineSpace, true, false)
198    ReadUnicodeTextLinesFromFile(writer, indexedFont, eb, linePos, lineSpace, false, true)
199    
200    // Finish the block of text
201    writer.WriteElement(eb.CreateTextEnd())
202
203    writer.End()    // save changes to the current page
204    doc.PagePushBack(page)
205    
206    doc.Save(outputPath + "unicodewrite.pdf", uint(SDFDocE_remove_unused | SDFDocE_hex_strings))
207    fmt.Println("Done. Result saved in unicodewrite.pdf...")
208    
209    doc.Close()
210    PDFNetTerminate()
211}

1//---------------------------------------------------------------------------------------
2// Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3// Consult legal.txt regarding legal and license information.
4//---------------------------------------------------------------------------------------
5
6const fs = require('fs')
7const process = require('process');
8const { PDFNet } = require('@pdftron/pdfnet-node');
9const PDFTronLicense = require('../LicenseKey/LicenseKey');
10
11((exports) => {
12
13  exports.runUnicodeWriteTest = () => {
14
15    const main = async () => {
16      try {
17        // Relative path to the folder containing test files.
18        const inputPath = '../TestFiles/';
19        const outputPath = '../TestFiles/Output/';
20
21        const doc = await PDFNet.PDFDoc.create();
22        doc.initSecurityHandler();
23
24        const eb = await PDFNet.ElementBuilder.create(); // ElementBuilder, used to build new element Objects
25        const writer = await PDFNet.ElementWriter.create(); // ElementWriter, used to write elements to the page
26
27        // Start a new page ------------------------------------
28        let page = await doc.pageCreate(new PDFNet.Rect(0, 0, 612, 794));
29
30        await writer.beginOnPage(page);
31
32        let font_program = inputPath + 'ARIALUNI.TTF';
33
34        if (!fs.existsSync(font_program)) {
35          font_program = 'C:/Windows/Fonts/ARIALUNI.TTF';
36          if (process.platform !== 'win32' || !fs.existsSync(font_program)) {
37            font_program = '';
38          }
39        }
40
41        let fnt;
42        if (font_program.length) {
43          console.log('Note: using ' + font_program + ' for unshaped unicode text');
44          // if we can find a specific wide-coverage font file, then use that directly
45          fnt = await PDFNet.Font.createCIDTrueTypeFont(doc, font_program, true, true);
46        } else {
47          console.log('Note: using system font substitution for unshaped unicode text');
48          // if we can't find a specific file, then use system font subsitution 
49          // as a fallback, using 'Helvetica' as a hint
50          fnt = await PDFNet.Font.createFromName(doc, 'Helvetica', '');
51        }
52
53        let element = await eb.createTextBeginWithFont(fnt, 1);
54        await element.setTextMatrixEntries(10, 0, 0, 10, 50, 600);
55        await (await element.getGState()).setLeading(2);		 // Set the spacing between lines
56        await writer.writeElement(element);
57
58        // Hello World!
59        const hello = 'Hello World!';
60        await writer.writeElement(await eb.createUnicodeTextRun(hello));
61        await writer.writeElement(await eb.createTextNewLine());
62
63        // Latin
64        const latin = 'aAbBcCdD' + String.fromCharCode(0x45, 0x0046, 0x00C0, 0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152) + '12';
65        await writer.writeElement(await eb.createUnicodeTextRun(latin));
66        await writer.writeElement(await eb.createTextNewLine());
67
68        // Greek
69        const greek = String.fromCharCode(0x039E, 0x039F, 0x03A0, 0x03A1, 0x03A3, 0x03A6, 0x03A8, 0x03A9);
70        await writer.writeElement(await eb.createUnicodeTextRun(greek));
71        await writer.writeElement(await eb.createTextNewLine());
72
73        // Cyrillic
74        const cyrillic = String.fromCharCode(
75          0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
76          0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419);
77        await writer.writeElement(await eb.createUnicodeTextRun(cyrillic));
78        await writer.writeElement(await eb.createTextNewLine());
79
80        // Hebrew
81        const hebrew = String.fromCharCode(
82          0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
83          0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1);
84        await writer.writeElement(await eb.createUnicodeTextRun(hebrew));
85        await writer.writeElement(await eb.createTextNewLine());
86
87        // Arabic
88        const arabic = String.fromCharCode(
89          0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
90          0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635);
91        await writer.writeElement(await eb.createUnicodeTextRun(arabic));
92        await writer.writeElement(await eb.createTextNewLine());
93
94        // Thai 
95        const thai = String.fromCharCode(
96          0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09,
97          0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12);
98        await writer.writeElement(await eb.createUnicodeTextRun(thai));
99        await writer.writeElement(await eb.createTextNewLine());
100
101        // Hiragana - Japanese 
102        const hiragana = String.fromCharCode(
103          0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
104          0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052);
105        await writer.writeElement(await eb.createUnicodeTextRun(hiragana));
106        await writer.writeElement(await eb.createTextNewLine());
107
108        // CJK Unified Ideographs
109        const cjk_uni = String.fromCharCode(
110          0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849,
111          0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852);
112        await writer.writeElement(await eb.createUnicodeTextRun(cjk_uni));
113        await writer.writeElement(await eb.createTextNewLine());
114
115        // Simplified Chinese
116        const chinese_simplified = String.fromCharCode(0x4e16, 0x754c, 0x60a8, 0x597d);
117        await writer.writeElement(await eb.createUnicodeTextRun(chinese_simplified));
118        await writer.writeElement(await eb.createTextNewLine());
119
120        // Finish the block of text
121        await writer.writeElement(await eb.createTextEnd());
122
123        console.log('Now using text shaping logic to place text');
124
125        // Create a font in indexed encoding mode 
126        // normally this would mean that we are required to provide glyph indices
127        // directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
128        // method to take care of this detail for us.
129        const indexed_font = await PDFNet.Font.createCIDTrueTypeFont(doc, inputPath + 'NotoSans_with_hindi.ttf', true, true, PDFNet.Font.Encoding.e_Indices);
130        element = await eb.createTextBeginWithFont(indexed_font, 10);
131        await writer.writeElement(element);
132
133        const line_pos = 350.0;
134        const line_space = 20.0;
135
136        // Transform unicode text into an abstract collection of glyph indices and positioning info 
137        let shaped_text = await indexed_font.getShapedText('Shaped Hindi Text:');
138
139        // transform the shaped text info into a PDF element and write it to the page
140        element = await eb.createShapedTextRun(shaped_text);
141        await element.setTextMatrixEntries(1.5, 0, 0, 1.5, 50, line_pos);
142        await writer.writeElement(element);
143
144        // read in unicode text lines from a file 
145        const hindi_text = fs.readFileSync(inputPath + 'hindi_sample_utf16le.txt', 'utf16le').toString().split(/\n/);
146
147        console.log('Read in ' + hindi_text.length + ' lines of Unicode text from file');
148        for (let i = 0; i < hindi_text.length; ++i) {
149          shaped_text = await indexed_font.getShapedText(hindi_text[i]);
150          element = await eb.createShapedTextRun(shaped_text);
151          await element.setTextMatrixEntries(1.5, 0, 0, 1.5, 50, line_pos - line_space * (i + 1));
152          await writer.writeElement(element);
153          console.log('Wrote shaped line to page');
154        }
155
156        // Finish the shaped block of text
157        await writer.writeElement(await eb.createTextEnd());
158
159        await writer.end();  // save changes to the current page
160        await doc.pagePushBack(page);
161
162        await doc.save(outputPath + 'unicodewrite.pdf', PDFNet.SDFDoc.SaveOptions.e_remove_unused | PDFNet.SDFDoc.SaveOptions.e_hex_strings);
163
164        console.log('Done. Result saved in unicodewrite.pdf...');
165      } catch (err) {
166        console.log(err);
167      }
168    };
169    PDFNet.runWithCleanup(main, PDFTronLicense.Key).catch(function (error) { console.log('Error: ' + JSON.stringify(error)); }).then(function () { return PDFNet.shutdown(); });
170  };
171  exports.runUnicodeWriteTest();
172})(exports);
173// eslint-disable-next-line spaced-comment
174//# sourceURL=UnicodeWriteTest.js

1<?php
2//---------------------------------------------------------------------------------------
3// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
4// Consult LICENSE.txt regarding license information.
5//---------------------------------------------------------------------------------------
6if(file_exists("../../../PDFNetC/Lib/PDFNetPHP.php"))
7include("../../../PDFNetC/Lib/PDFNetPHP.php");
8include("../../LicenseKey/PHP/LicenseKey.php");
9
10// Relative path to the folder containing the test files.
11$input_path = getcwd()."/../../TestFiles/";
12$output_path = $input_path."Output/";
13
14//---------------------------------------------------------------------------------------
15// This example illustrates how to create Unicode text and how to embed composite fonts.
16// 
17// Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles' 
18// directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and 
19// MS Office.
20//---------------------------------------------------------------------------------------
21function main()
22{
23    global $input_path, $output_path, $LicenseKey;
24
25	PDFNet::Initialize($LicenseKey);
26	PDFNet::GetSystemFontList();    // Wait for fonts to be loaded if they haven't already. This is done because PHP can run into errors when shutting down if font loading is still in progress.
27
28	$doc = new PDFDoc();
29
30	$builder = new ElementBuilder();
31	$writer = new ElementWriter();	
32
33	// Start a new page ------------------------------------
34	$page = $doc->PageCreate(new Rect(0.0, 0.0, 612.0, 794.0));
35
36	$writer->Begin($page);	// begin writing to this page
37
38	// Embed and subset the font
39	$font_program = $input_path."ARIALUNI.TTF";
40	if (!file_exists($font_program)) {
41		if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
42			$font_program = "C:/Windows/Fonts/ARIALUNI.TTF";
43		}
44	}
45	$fnt = NULL;
46	try {
47		$fnt = Font::CreateCIDTrueTypeFont($doc->GetSDFDoc(), $font_program, true, true);
48	}
49	catch(Exception $e){
50
51	}
52	if($fnt)
53	{
54		echo(nl2br("Note: using " . $font_program . " for unshaped unicode text\n"));
55	}
56	else
57	{
58		echo(nl2br("Note: using system font substitution for unshaped unicode text\n"));
59		$fnt = Font::Create($doc->GetSDFDoc(), "Helvetica", "");		
60	}
61
62	$element = $builder->CreateTextBegin($fnt, 1.0);
63	$element->SetTextMatrix(10.0, 0.0, 0.0, 10.0, 50.0, 600.0);
64	$element->GetGState()->SetLeading(2);		 // Set the spacing between lines
65	$writer->WriteElement($element);
66
67	// Hello World!
68	$hello = array( 'H','e','l','l','o',' ','W','o','r','l','d','!');
69	$writer->WriteElement($builder->CreateUnicodeTextRun($hello, count($hello)));
70	$writer->WriteElement($builder->CreateTextNewLine());
71
72	// Latin
73	$latin = array(   
74		'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
75		0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' // etc.
76	);
77	$writer->WriteElement($builder->CreateUnicodeTextRun($latin, count($latin)));
78	$writer->WriteElement($builder->CreateTextNewLine());
79
80	// Greek
81	$greek = array(   
82		0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9  // etc.
83	);
84	$writer->WriteElement($builder->CreateUnicodeTextRun($greek, count($greek)));
85	$writer->WriteElement($builder->CreateTextNewLine());
86
87	// Cyrillic
88	$cyrillic = array(   
89		0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
90		0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419 // etc.
91	);
92	$writer->WriteElement($builder->CreateUnicodeTextRun($cyrillic, count($cyrillic)));
93	$writer->WriteElement($builder->CreateTextNewLine());
94
95	// Hebrew
96	$hebrew = array(
97		0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 
98		0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1 // etc. 
99	);
100	$writer->WriteElement($builder->CreateUnicodeTextRun($hebrew, count($hebrew)));
101	$writer->WriteElement($builder->CreateTextNewLine());
102
103	// Arabic
104	$arabic = array(
105		0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 
106		0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635 // etc. 
107	);
108	$writer->WriteElement($builder->CreateUnicodeTextRun($arabic, count($arabic)));
109	$writer->WriteElement($builder->CreateTextNewLine());
110
111	// Thai 
112	$thai = array(
113		0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
114		0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12 // etc. 
115	);
116	$writer->WriteElement($builder->CreateUnicodeTextRun($thai, count($thai)));
117	$writer->WriteElement($builder->CreateTextNewLine());
118
119	// Hiragana - Japanese 
120	$hiragana = array(
121		0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 
122		0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052 // etc. 
123	);
124	$writer->WriteElement($builder->CreateUnicodeTextRun($hiragana, count($hiragana)));
125	$writer->WriteElement($builder->CreateTextNewLine());
126
127	// CJK Unified Ideographs
128	$cjk_uni = array(
129		0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
130		0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852 // etc. 
131	);
132	$writer->WriteElement($builder->CreateUnicodeTextRun($cjk_uni, count($cjk_uni)));
133	$writer->WriteElement($builder->CreateTextNewLine());
134
135	// Simplified Chinese
136	$chinese_simplified = array(
137		0x4e16, 0x754c, 0x60a8, 0x597d
138	);
139	$writer->WriteElement($builder->CreateUnicodeTextRun($chinese_simplified, count($chinese_simplified)));
140	$writer->WriteElement($builder->CreateTextNewLine());
141
142	echo("Now using text shaping logic to place text\n");
143
144	// Create a font in indexed encoding mode 
145	// normally this would mean that we are required to provide glyph indices
146	// directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
147	// method to take care of this detail for us.
148	$indexed_font = Font::CreateCIDTrueTypeFont($doc->GetSDFDoc(), $input_path . "NotoSans_with_hindi.ttf", true, true, Font::e_Indices);
149	$element = $builder->CreateTextBegin($indexed_font, 10.0);
150	$writer->WriteElement($element);
151
152	$line_pos = 350.0;
153	$line_space = 20.0;
154
155	// Transform unicode text into an abstract collection of glyph indices and positioning info 
156	$shaped_text = $indexed_font->GetShapedText("Shaped Hindi Text:");
157
158	// transform the shaped text info into a PDF element and write it to the page
159	$element = $builder->CreateShapedTextRun($shaped_text);
160	$element->SetTextMatrix(1.5, 0.0, 0.0, 1.5, 50.0, $line_pos);
161	$writer->WriteElement($element);
162
163	# read in unicode text lines from a file 
164	$f = fopen($input_path . "hindi_sample_utf16le.txt", "r");
165	$i = 0;
166	while($hindi_text = fgets($f)){$i++;}
167	fclose($f);
168	echo("Read in " . $i . " lines of Unicode text from file\n");
169
170	$f = fopen($input_path . "hindi_sample_utf16le.txt", "r");
171	$i = 0;
172	while($hindi_text = fgets($f)){
173		if ($i == 0)
174			$tmp1 = substr($hindi_text,0,-1);
175		else if($i == 1)
176			$tmp1 = substr($hindi_text,1,-2); // remove the first and the last 2 characters so encoding to UTF-8 looks correct in PHP 
177		$tmp = iconv($in_charset = "UTF-16LE", $out_charset="UTF-8", $tmp1);
178		$shaped_text = $indexed_font->GetShapedText($tmp);
179		$element = $builder->CreateShapedTextRun($shaped_text);
180		$element->SetTextMatrix(1.5, 0.0, 0.0, 1.5, 50.0, $line_pos-$line_space*($i+1));
181		$writer->WriteElement($element);
182		echo("Wrote shaped line to page\n");
183		$i++;
184
185	}
186	fclose($f);
187
188	// Finish the block of text
189	$writer->WriteElement($builder->CreateTextEnd());
190
191	$writer->End();  // save changes to the current page
192	$doc->PagePushBack($page);
193
194	$doc->Save($output_path."unicodewrite.pdf", SDFDoc::e_remove_unused | SDFDoc::e_hex_strings);
195	PDFNet::Terminate();
196	echo "Done. Result saved in unicodewrite.pdf...\n";
197}
198
199main();
200?>

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10import os, io
11
12sys.path.append("../../LicenseKey/PYTHON")
13from LicenseKey import *
14
15# Relative path to the folder containing the test files.
16input_path = "../../TestFiles/"
17output_path = "../../TestFiles/Output/"
18
19# This example illustrates how to create Unicode text and how to embed composite fonts.
20# 
21# Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles' 
22# directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and 
23# MS Office.
24# 
25# For more information about Arial Unicode MS, please consult the following Microsoft Knowledge 
26# Base Article: WD2002: General Information About the Arial Unicode MS Font
27#    http://support.microsoft.com/support/kb/articles/q287/2/47.asp
28# 
29# For more information consult: 
30#    http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
31#    http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
32# 
33# In case you don't have access to Arial Unicode MS you can use cyberbit.ttf 
34# (http://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
35def main():
36    PDFNet.Initialize(LicenseKey)
37    
38    doc = PDFDoc()
39    eb = ElementBuilder()
40    writer = ElementWriter()
41    
42    # Start a new page ------------------------------------
43    page = doc.PageCreate(Rect(0, 0, 612, 794))
44    
45    writer.Begin(page)    # begin writing to this page
46       
47    # Embed and subset the font
48    font_program = input_path + "ARIALUNI.TTF"
49    if not os.path.isfile(font_program):
50        if sys.platform == 'win32':
51            font_program = "C:/Windows/Fonts/ARIALUNI.TTF"
52    fnt = None
53    try:
54        fnt = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), font_program, True, True)
55    except:
56        pass
57
58    if fnt:
59        print("Note: using " + font_program + " for unshaped unicode text")
60    else:
61        print("Note: using system font substitution for unshaped unicode text")
62        fnt = Font.Create(doc.GetSDFDoc(), "Helvetica", "")
63
64    element = eb.CreateTextBegin(fnt, 1)
65    element.SetTextMatrix(10, 0, 0, 10, 50, 600)
66    element.GetGState().SetLeading(2)         # Set the spacing between lines
67    writer.WriteElement(element)
68
69    # Hello World!
70    hello = ['H','e','l','l','o',' ','W','o','r','l','d','!']
71    writer.WriteElement(eb.CreateUnicodeTextRun(hello, len(hello)))
72    writer.WriteElement(eb.CreateTextNewLine())
73    
74    # Latin
75    latin = ['a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
76            0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' ]# etc.
77    writer.WriteElement(eb.CreateUnicodeTextRun((latin), len(latin)))
78    writer.WriteElement(eb.CreateTextNewLine())
79    
80    # Greek
81    greek = [0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9]
82    writer.WriteElement(eb.CreateUnicodeTextRun((greek), len(greek)))
83    writer.WriteElement(eb.CreateTextNewLine())
84    
85    # Cyrillic
86    cyrillic = [0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
87                0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419]
88    writer.WriteElement(eb.CreateUnicodeTextRun((cyrillic), len(cyrillic)))
89    writer.WriteElement(eb.CreateTextNewLine())
90    
91    # Hebrew
92    hebrew = [0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
93              0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1]
94    writer.WriteElement(eb.CreateUnicodeTextRun((hebrew), len(hebrew)))
95    writer.WriteElement(eb.CreateTextNewLine())
96    
97    # Arabic
98    arabic = [0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
99              0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635]
100    writer.WriteElement(eb.CreateUnicodeTextRun((arabic), len(arabic)))
101    writer.WriteElement(eb.CreateTextNewLine())
102    
103    # Thai
104    thai = [0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
105            0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12]
106    writer.WriteElement(eb.CreateUnicodeTextRun((thai), len(thai)))
107    writer.WriteElement(eb.CreateTextNewLine())
108    
109    # Hiragana - Japanese 
110    hiragana = [0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
111                0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052]
112    writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, len(hiragana)))
113    writer.WriteElement(eb.CreateTextNewLine())
114    
115    # CJK Unified Ideographs 
116    cjk_uni = [0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
117               0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852]
118    writer.WriteElement(eb.CreateUnicodeTextRun((cjk_uni), len(cjk_uni)))
119    writer.WriteElement(eb.CreateTextNewLine())
120    
121    # Simplified Chinese
122    chinese_simplified = [0x4e16, 0x754c, 0x60a8, 0x597d]
123    writer.WriteElement(eb.CreateUnicodeTextRun((chinese_simplified), len(chinese_simplified)))
124    writer.WriteElement(eb.CreateTextNewLine())
125
126    # Finish the block of text
127    writer.WriteElement(eb.CreateTextEnd())
128
129    print("Now using text shaping logic to place text")
130
131    # Create a font in indexed encoding mode 
132    # normally this would mean that we are required to provide glyph indices
133    # directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
134    # method to take care of this detail for us.
135    indexed_font = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), input_path + "NotoSans_with_hindi.ttf", True, True, Font.e_Indices)
136    element = eb.CreateTextBegin(indexed_font, 10)
137    writer.WriteElement(element)
138
139    line_pos = 350.0
140    line_space = 20.0
141
142    # Transform unicode text into an abstract collection of glyph indices and positioning info 
143    shaped_text = indexed_font.GetShapedText("Shaped Hindi Text:")
144
145    # transform the shaped text info into a PDF element and write it to the page
146    element = eb.CreateShapedTextRun(shaped_text);
147    element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos);
148    writer.WriteElement(element);
149
150    # read in unicode text lines from a file 
151    with io.open(input_path + "hindi_sample_utf16le.txt", "r", encoding='utf-16-le') as f:
152        hindi_text = f.readlines()
153        print("Read in " + str(len(hindi_text)) + " lines of Unicode text from file")
154        for i in range(len(hindi_text)):
155            shaped_text = indexed_font.GetShapedText(hindi_text[i][:-1])
156            element = eb.CreateShapedTextRun(shaped_text)
157            element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1))
158            writer.WriteElement(element)
159            print("Wrote shaped line to page")
160
161    # Finish the block of text
162    writer.WriteElement(eb.CreateTextEnd())
163
164    writer.End()    # save changes to the current page
165    doc.PagePushBack(page)
166    
167    doc.Save(output_path + "unicodewrite.pdf", SDFDoc.e_remove_unused | SDFDoc.e_hex_strings)
168    print("Done. Result saved in unicodewrite.pdf...")
169    
170    doc.Close()
171    PDFNet.Terminate()
172
173if __name__ == '__main__':
174    main()

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12# Relative path to the folder containing the test files.
13$input_path = "../../TestFiles/"
14$output_path = "../../TestFiles/Output/"
15
16# This example illustrates how to create Unicode text and how to embed composite fonts.
17# 
18# Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles' 
19# directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and 
20# MS Office.
21# 
22# For more information about Arial Unicode MS, please consult the following Microsoft Knowledge 
23# Base Article: WD2002: General Information About the Arial Unicode MS Font
24#    http://support.microsoft.com/support/kb/articles/q287/2/47.asp
25# 
26# For more information consult: 
27#    http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
28#    http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
29# 
30# In case you don't have access to Arial Unicode MS you can use cyberbit.ttf 
31# (http://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
32def main()
33	PDFNet.Initialize(PDFTronLicense.Key)
34    
35	doc = PDFDoc.new
36	eb = ElementBuilder.new
37	writer = ElementWriter.new
38	# Start a new page ------------------------------------
39	page = doc.PageCreate(Rect.new(0, 0, 612, 794))
40	writer.Begin(page)    # begin writing to this page
41
42	# Embed and subset the font
43	font_program = $input_path + "ARIALUNI.TTF"
44	if not File.file?(font_program)
45		if ENV['OS'] == "Windows_NT"
46			font_program = "C:/Windows/Fonts/ARIALUNI.TTF"
47			puts "Note: Using ARIALUNI.TTF from C:/Windows/Fonts directory."
48		end
49	end
50	begin
51		fnt = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), font_program, true, true)
52	rescue
53	end
54
55	if not fnt.nil?
56		puts "Note: using " + font_program + " for unshaped unicode text"
57	else
58		puts "Note: using system font substitution for unshaped unicode text"
59		fnt = Font.Create(doc.GetSDFDoc(), "Helvetica", "")
60	end
61
62	element = eb.CreateTextBegin(fnt, 1)
63	element.SetTextMatrix(10, 0, 0, 10, 50, 600)
64	element.GetGState.SetLeading(2)         # Set the spacing between lines
65	writer.WriteElement(element)
66
67	# Hello World!
68	hello = ['H','e','l','l','o',' ','W','o','r','l','d','!']
69	writer.WriteElement(eb.CreateUnicodeTextRun(hello, hello.length))
70	writer.WriteElement(eb.CreateTextNewLine)
71
72	# Latin
73	latin = ['a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
74		0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' ]# etc.
75	writer.WriteElement(eb.CreateUnicodeTextRun(latin, latin.length))
76	writer.WriteElement(eb.CreateTextNewLine)
77    
78	# Greek
79	greek = [0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9]
80	writer.WriteElement(eb.CreateUnicodeTextRun(greek, greek.length))
81	writer.WriteElement(eb.CreateTextNewLine)
82    
83	# Cyrillic
84	cyrillic = [0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
85		0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419]
86	writer.WriteElement(eb.CreateUnicodeTextRun(cyrillic, cyrillic.length))
87	writer.WriteElement(eb.CreateTextNewLine)
88    
89	# Hebrew
90	hebrew = [0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
91		0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1]
92	writer.WriteElement(eb.CreateUnicodeTextRun(hebrew, hebrew.length))
93	writer.WriteElement(eb.CreateTextNewLine)
94    
95	# Arabic
96	arabic = [0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
97		0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635]
98	writer.WriteElement(eb.CreateUnicodeTextRun(arabic, arabic.length))
99	writer.WriteElement(eb.CreateTextNewLine)
100    
101	# Thai
102	thai = [0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
103		0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12]
104	writer.WriteElement(eb.CreateUnicodeTextRun(thai, thai.length))
105	writer.WriteElement(eb.CreateTextNewLine)
106    
107	# Hiragana - Japanese 
108	hiragana = [0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
109		0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052]
110	writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, hiragana.length))
111	writer.WriteElement(eb.CreateTextNewLine)
112    
113	# CJK Unified Ideographs 
114	cjk_uni = [0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
115		0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852]
116	writer.WriteElement(eb.CreateUnicodeTextRun(cjk_uni, cjk_uni.length))
117	writer.WriteElement(eb.CreateTextNewLine)
118    
119	# Simplified Chinese
120	chinese_simplified = [0x4e16, 0x754c, 0x60a8, 0x597d]
121	writer.WriteElement(eb.CreateUnicodeTextRun(chinese_simplified, chinese_simplified.length))
122	writer.WriteElement(eb.CreateTextNewLine)
123
124	puts "Now using text shaping logic to place text"
125
126	# Create a font in indexed encoding mode 
127	# normally this would mean that we are required to provide glyph indices
128	# directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
129	# method to take care of this detail for us.
130	indexed_font = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), $input_path + "NotoSans_with_hindi.ttf", true, true, Font::E_Indices)
131	element = eb.CreateTextBegin(indexed_font, 10)
132	writer.WriteElement(element)
133
134	line_pos = 350.0
135	line_space = 20.0
136
137	# Transform unicode text into an abstract collection of glyph indices and positioning info 
138	shaped_text = indexed_font.GetShapedText("Shaped Hindi Text:")
139
140	# transform the shaped text info into a PDF element and write it to the page
141	element = eb.CreateShapedTextRun(shaped_text)
142	element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos)
143	writer.WriteElement(element)
144
145	# read in unicode text lines from a file 
146	line_num=0
147	File.open($input_path +"hindi_sample_utf16le.txt", "rb:UTF-16LE").each do |line|
148		line_num += 1
149	end
150	puts "Read in %d lines of Unicode text from file" % line_num
151
152	i=0
153	File.open($input_path + "hindi_sample_utf16le.txt", "rb:UTF-16LE") do |f|
154	f.each_line do |line|
155		begin
156			shaped_text = indexed_font.GetShapedText(line[0..-2].encode('utf-8'))
157			element = eb.CreateShapedTextRun(shaped_text)
158			element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1))
159			writer.WriteElement(element)
160			puts "Wrote shaped line to page"
161			i+=1
162		rescue
163		end
164	end
165	end
166
167	# Finish the block of text
168	writer.WriteElement(eb.CreateTextEnd())
169
170	# Finish the block of text
171	writer.WriteElement(eb.CreateTextEnd)
172    
173	writer.End    # save changes to the current page
174	doc.PagePushBack(page)
175    
176	doc.Save($output_path + "unicodewrite.pdf", SDFDoc::E_remove_unused | SDFDoc::E_hex_strings)
177	puts "Done. Result saved in unicodewrite.pdf..."
178    
179	doc.Close
180	PDFNet.Terminate
181end
182
183main()

1'
2' Copyright (c) 2001-2024 by Apryse Software Inc. All Rights Reserved.
3'
4
5Imports System
6Imports System.IO
7Imports System.Text
8Imports pdftron
9Imports pdftron.Common
10Imports pdftron.Filters
11Imports pdftron.SDF
12Imports pdftron.PDF
13
14' This example illustrates how to create Unicode text and how to embed composite fonts.
15Module UnicodeWriteTestVB
16	Dim pdfNetLoader As PDFNetLoader
17	Sub New()
18		pdfNetLoader = pdftron.PDFNetLoader.Instance()
19	End Sub
20
21	' Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles' 
22	' directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and 
23	' MS Office.
24	' 
25	' For more information about Arial Unicode MS, please consult the following Microsoft Knowledge 
26	' Base Article: WD2002: General Information About the Arial Unicode MS Font
27	'  http://support.microsoft.com/support/kb/articles/q287/2/47.asp
28	'
29	' For more information consult: 
30	'    http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
31	'    http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
32	' 
33	' In case you don't have access to Arial Unicode MS you can use cyberbit.ttf 
34	' (ftp://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
35	'
36	Sub Main()
37
38		PDFNet.Initialize(PDFTronLicense.Key)
39
40		' Relative path to the folder containing test files.
41		Dim input_path As String = "../../../../TestFiles/"
42		Dim output_path As String = "../../../../TestFiles/Output/"
43
44		Try
45			Using doc As PDFDoc = New PDFDoc
46				Using eb As ElementBuilder = New ElementBuilder
47					Using writer As ElementWriter = New ElementWriter
48
49						' Start a new page ------------------------------------
50						Dim page As Page = doc.PageCreate(New Rect(0, 0, 612, 794))
51
52						writer.Begin(page)		  ' begin writing to this page
53
54						Dim fnt As Font
55						Try
56
57							' Full font embedding
58							Dim myfont As System.Drawing.Font = New System.Drawing.Font("Arial Unicode MS", 12)
59							fnt = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), myfont, True, True)
60
61							' To embed the font file directly use:
62							' fnt = Font.CreateCIDTrueTypeFont(doc, input_path + "arialuni.ttf", true, true)
63
64							' Example of font substitution
65							' fnt = Font.CreateCIDTrueTypeFont(doc, input_path + "arialuni.ttf", false)
66						Catch e As PDFNetException
67						End Try
68
69						If fnt Is Nothing Then
70							Try
71								fnt = Font.CreateCIDTrueTypeFont(doc, input_path & "ARIALUNI.TTF", True, True)
72							Catch e As PDFNetException
73							End Try
74						End If
75
76						If fnt Is Nothing Then
77							Try
78								fnt = Font.CreateCIDTrueTypeFont(doc, "C:/Windows/Fonts/ARIALUNI.TTF", True, True)
79							Catch e As PDFNetException
80							End Try
81						End If
82
83						If fnt Is Nothing Then
84							Console.WriteLine("Note: using system font substitution for unshaped unicode text")
85							fnt = Font.Create(doc, "Helvetica", "")
86						Else
87							Console.WriteLine("Note: using Arial Unicode for unshaped unicode text")
88						End If
89
90						Dim element As Element = eb.CreateTextBegin(fnt, 1)
91						element.SetTextMatrix(10, 0, 0, 10, 50, 600)
92						element.GetGState().SetLeading(2)			' Set the spacing between lines
93						writer.WriteElement(element)
94
95						' Hello World!!!
96						Dim hello As String = "Hello World!"
97						writer.WriteElement(eb.CreateUnicodeTextRun(hello))
98						writer.WriteElement(eb.CreateTextNewLine())
99
100						' Latin
101						Dim latin As Char() = { _
102							"a"c, "A"c, "b"c, "B"c, "c"c, "C"c, "d"c, "D"c, ChrW(&H45), ChrW(&H46), ChrW(&HC0), _
103							ChrW(&HC1), ChrW(&HC2), ChrW(&H143), ChrW(&H144), ChrW(&H145), ChrW(&H152), "1"c, "2"c _
104							}			 ' etc.
105
106						writer.WriteElement(eb.CreateUnicodeTextRun(New String(latin)))
107						writer.WriteElement(eb.CreateTextNewLine())
108
109						' Greek
110						Dim greek As Char() = { _
111							ChrW(&H39E), ChrW(&H39F), ChrW(&H3A0), ChrW(&H3A1), ChrW(&H3A3), ChrW(&H3A6), ChrW(&H3A8), ChrW(&H3A9) _
112							}			 ' etc.
113
114						writer.WriteElement(eb.CreateUnicodeTextRun(New String(greek)))
115						writer.WriteElement(eb.CreateTextNewLine())
116
117						' Cyrillic
118						Dim cyrillic As Char() = { _
119							ChrW(&H409), ChrW(&H40A), ChrW(&H40B), ChrW(&H40C), ChrW(&H40E), ChrW(&H40F), ChrW(&H410), ChrW(&H411), _
120							ChrW(&H412), ChrW(&H413), ChrW(&H414), ChrW(&H415), ChrW(&H416), ChrW(&H417), ChrW(&H418), ChrW(&H419) _
121							}			 ' etc.
122
123						writer.WriteElement(eb.CreateUnicodeTextRun(New String(cyrillic)))
124						writer.WriteElement(eb.CreateTextNewLine())
125
126						' Hebrew
127						Dim hebrew As Char() = { _
128							ChrW(&H5D0), ChrW(&H5D1), ChrW(&H5D3), ChrW(&H5D3), ChrW(&H5D4), ChrW(&H5D5), ChrW(&H5D6), ChrW(&H5D7), ChrW(&H5D8), _
129							ChrW(&H5D9), ChrW(&H5DA), ChrW(&H5DB), ChrW(&H5DC), ChrW(&H5DD), ChrW(&H5DE), ChrW(&H5DF), ChrW(&H5E0), ChrW(&H5E1) _
130							}			 ' etc. 
131
132						writer.WriteElement(eb.CreateUnicodeTextRun(New String(hebrew)))
133						writer.WriteElement(eb.CreateTextNewLine())
134
135						' Arabic
136						Dim arabic As Char() = { _
137							ChrW(&H624), ChrW(&H625), ChrW(&H626), ChrW(&H627), ChrW(&H628), ChrW(&H629), ChrW(&H62A), ChrW(&H62B), ChrW(&H62C), _
138							ChrW(&H62D), ChrW(&H62E), ChrW(&H62F), ChrW(&H630), ChrW(&H631), ChrW(&H632), ChrW(&H633), ChrW(&H634), ChrW(&H635) _
139							}			 ' etc. 
140
141						writer.WriteElement(eb.CreateUnicodeTextRun(New String(arabic)))
142						writer.WriteElement(eb.CreateTextNewLine())
143
144						' Thai 
145						Dim thai As Char() = { _
146							ChrW(&HE01), ChrW(&HE02), ChrW(&HE03), ChrW(&HE04), ChrW(&HE05), ChrW(&HE06), ChrW(&HE07), ChrW(&HE08), ChrW(&HE09), _
147							ChrW(&HE0A), ChrW(&HE0B), ChrW(&HE0C), ChrW(&HE0D), ChrW(&HE0E), ChrW(&HE0F), ChrW(&HE10), ChrW(&HE11), ChrW(&HE12) _
148							}			 ' etc. 
149
150						writer.WriteElement(eb.CreateUnicodeTextRun(New String(thai)))
151						writer.WriteElement(eb.CreateTextNewLine())
152
153						' Hiragana - Japanese 
154						Dim hiragana As Char() = { _
155							ChrW(&H3041), ChrW(&H3042), ChrW(&H3043), ChrW(&H3044), ChrW(&H3045), ChrW(&H3046), ChrW(&H3047), ChrW(&H3048), ChrW(&H3049), _
156							ChrW(&H304A), ChrW(&H304B), ChrW(&H304C), ChrW(&H304D), ChrW(&H304E), ChrW(&H304F), ChrW(&H3051), ChrW(&H3051), ChrW(&H3052) _
157							}			 ' etc. 
158
159						writer.WriteElement(eb.CreateUnicodeTextRun(New String(hiragana)))
160						writer.WriteElement(eb.CreateTextNewLine())
161
162						' CJK Unified Ideographs
163						Dim cjk_uni As Char() = { _
164							ChrW(&H5841), ChrW(&H5842), ChrW(&H5843), ChrW(&H5844), ChrW(&H5845), ChrW(&H5846), ChrW(&H5847), ChrW(&H5848), ChrW(&H5849), _
165							ChrW(&H584A), ChrW(&H584B), ChrW(&H584C), ChrW(&H584D), ChrW(&H584E), ChrW(&H584F), ChrW(&H5850), ChrW(&H5851), ChrW(&H5852) _
166							}			 ' etc. 
167
168						writer.WriteElement(eb.CreateUnicodeTextRun(New String(cjk_uni)))
169						writer.WriteElement(eb.CreateTextNewLine())
170
171						Dim chinese_simplified As Char() = {ChrW(&H4e16), ChrW(&H754c), ChrW(&H60a8),ChrW(&H597D)}
172						writer.WriteElement(eb.CreateUnicodeTextRun(New String(chinese_simplified)))
173							writer.WriteElement(eb.CreateTextNewLine())
174	
175						' Finish the block of text
176						writer.WriteElement(eb.CreateTextEnd())
177						Console.WriteLine("Now using text shaping logic to place text")
178
179						Dim indexedFont As Font = Font.CreateCIDTrueTypeFont(doc, input_path & "NotoSans_with_hindi.ttf", True, True, Font.Encoding.e_Indices)
180						element = eb.CreateTextBegin(indexedFont, 10.0)
181						writer.WriteElement(element)
182						Dim linePos As Double = 350.0
183						Dim lineSpace As Double = 20.0
184						Dim shapedText As ShapedText = indexedFont.GetShapedText("Shaped Hindi Text:")
185						element = eb.CreateShapedTextRun(shapedText)
186						element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos)
187						linePos -= lineSpace
188						writer.WriteElement(element)
189						Dim hindiTextLines As String() = File.ReadAllLines(input_path & "hindi_sample_utf16le.txt", Encoding.UTF8)
190
191						Console.WriteLine("Read in " & hindiTextLines.Length & " lines of Unicode text from file")
192						For Each textLine As String In hindiTextLines
193							shapedText = indexedFont.GetShapedText(textLine)
194							element = eb.CreateShapedTextRun(shapedText)
195							element.SetTextMatrix(1.5, 0, 0, 1.5, 50, linePos)
196							linePos -= lineSpace
197							writer.WriteElement(element)
198							Console.WriteLine("Wrote shaped line to page")
199						Next
200						writer.WriteElement(eb.CreateTextEnd())
201						writer.End()			  ' save changes to the current page
202						doc.PagePushBack(page)
203
204						doc.Save(output_path + "unicodewrite.pdf", SDF.SDFDoc.SaveOptions.e_remove_unused Or SDF.SDFDoc.SaveOptions.e_hex_strings)
205						Console.WriteLine("Done. Result saved in unicodewrite.pdf...")
206					End Using
207				End Using
208			End Using
209
210		Catch ex As PDFNetException
211
212			Console.WriteLine(ex.Message)
213			Console.WriteLine()
214
215		Catch ex As Exception
216
217			MsgBox(ex.Message)
218
219		End Try
220		PDFNet.Terminate()
221	End Sub
222
223End Module

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales

Product:

Create Unicode Text, Embed CID in PDFs - Python Sample Code