Create Unicode Text, Embed CID in PDFs - Ruby Sample Code

Sample code for using Apryse SDK to create Unicode text and embed composite fonts in PDF files. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6require '../../../PDFNetC/Lib/PDFNetRuby'
7include PDFNetRuby
8require '../../LicenseKey/RUBY/LicenseKey'
9
10$stdout.sync = true
11
12# Relative path to the folder containing the test files.
13$input_path = "../../TestFiles/"
14$output_path = "../../TestFiles/Output/"
15
16# This example illustrates how to create Unicode text and how to embed composite fonts.
17#
18# Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles'
19# directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and
20# MS Office.
21#
22# For more information about Arial Unicode MS, please consult the following Microsoft Knowledge
23# Base Article: WD2002: General Information About the Arial Unicode MS Font
24# http://support.microsoft.com/support/kb/articles/q287/2/47.asp
25#
26# For more information consult:
27# http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
28# http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
29#
30# In case you don't have access to Arial Unicode MS you can use cyberbit.ttf
31# (http://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
32def main()
33 PDFNet.Initialize(PDFTronLicense.Key)
34
35 doc = PDFDoc.new
36 eb = ElementBuilder.new
37 writer = ElementWriter.new
38 # Start a new page ------------------------------------
39 page = doc.PageCreate(Rect.new(0, 0, 612, 794))
40 writer.Begin(page) # begin writing to this page
41
42 # Embed and subset the font
43 font_program = $input_path + "ARIALUNI.TTF"
44 if not File.file?(font_program)
45 if ENV['OS'] == "Windows_NT"
46 font_program = "C:/Windows/Fonts/ARIALUNI.TTF"
47 puts "Note: Using ARIALUNI.TTF from C:/Windows/Fonts directory."
48 end
49 end
50 begin
51 fnt = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), font_program, true, true)
52 rescue
53 end
54
55 if not fnt.nil?
56 puts "Note: using " + font_program + " for unshaped unicode text"
57 else
58 puts "Note: using system font substitution for unshaped unicode text"
59 fnt = Font.Create(doc.GetSDFDoc(), "Helvetica", "")
60 end
61
62 element = eb.CreateTextBegin(fnt, 1)
63 element.SetTextMatrix(10, 0, 0, 10, 50, 600)
64 element.GetGState.SetLeading(2) # Set the spacing between lines
65 writer.WriteElement(element)
66
67 # Hello World!
68 hello = ['H','e','l','l','o',' ','W','o','r','l','d','!']
69 writer.WriteElement(eb.CreateUnicodeTextRun(hello, hello.length))
70 writer.WriteElement(eb.CreateTextNewLine)
71
72 # Latin
73 latin = ['a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0,
74 0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' ]# etc.
75 writer.WriteElement(eb.CreateUnicodeTextRun(latin, latin.length))
76 writer.WriteElement(eb.CreateTextNewLine)
77
78 # Greek
79 greek = [0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9]
80 writer.WriteElement(eb.CreateUnicodeTextRun(greek, greek.length))
81 writer.WriteElement(eb.CreateTextNewLine)
82
83 # Cyrillic
84 cyrillic = [0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
85 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419]
86 writer.WriteElement(eb.CreateUnicodeTextRun(cyrillic, cyrillic.length))
87 writer.WriteElement(eb.CreateTextNewLine)
88
89 # Hebrew
90 hebrew = [0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
91 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1]
92 writer.WriteElement(eb.CreateUnicodeTextRun(hebrew, hebrew.length))
93 writer.WriteElement(eb.CreateTextNewLine)
94
95 # Arabic
96 arabic = [0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
97 0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635]
98 writer.WriteElement(eb.CreateUnicodeTextRun(arabic, arabic.length))
99 writer.WriteElement(eb.CreateTextNewLine)
100
101 # Thai
102 thai = [0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09,
103 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12]
104 writer.WriteElement(eb.CreateUnicodeTextRun(thai, thai.length))
105 writer.WriteElement(eb.CreateTextNewLine)
106
107 # Hiragana - Japanese
108 hiragana = [0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
109 0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052]
110 writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, hiragana.length))
111 writer.WriteElement(eb.CreateTextNewLine)
112
113 # CJK Unified Ideographs
114 cjk_uni = [0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849,
115 0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852]
116 writer.WriteElement(eb.CreateUnicodeTextRun(cjk_uni, cjk_uni.length))
117 writer.WriteElement(eb.CreateTextNewLine)
118
119 # Simplified Chinese
120 chinese_simplified = [0x4e16, 0x754c, 0x60a8, 0x597d]
121 writer.WriteElement(eb.CreateUnicodeTextRun(chinese_simplified, chinese_simplified.length))
122 writer.WriteElement(eb.CreateTextNewLine)
123
124 puts "Now using text shaping logic to place text"
125
126 # Create a font in indexed encoding mode
127 # normally this would mean that we are required to provide glyph indices
128 # directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
129 # method to take care of this detail for us.
130 indexed_font = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), $input_path + "NotoSans_with_hindi.ttf", true, true, Font::E_Indices)
131 element = eb.CreateTextBegin(indexed_font, 10)
132 writer.WriteElement(element)
133
134 line_pos = 350.0
135 line_space = 20.0
136
137 # Transform unicode text into an abstract collection of glyph indices and positioning info
138 shaped_text = indexed_font.GetShapedText("Shaped Hindi Text:")
139
140 # transform the shaped text info into a PDF element and write it to the page
141 element = eb.CreateShapedTextRun(shaped_text)
142 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos)
143 writer.WriteElement(element)
144
145 # read in unicode text lines from a file
146 line_num=0
147 File.open($input_path +"hindi_sample_utf16le.txt", "rb:UTF-16LE").each do |line|
148 line_num += 1
149 end
150 puts "Read in %d lines of Unicode text from file" % line_num
151
152 i=0
153 File.open($input_path + "hindi_sample_utf16le.txt", "rb:UTF-16LE") do |f|
154 f.each_line do |line|
155 begin
156 shaped_text = indexed_font.GetShapedText(line[0..-2].encode('utf-8'))
157 element = eb.CreateShapedTextRun(shaped_text)
158 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1))
159 writer.WriteElement(element)
160 puts "Wrote shaped line to page"
161 i+=1
162 rescue
163 end
164 end
165 end
166
167 # Finish the block of text
168 writer.WriteElement(eb.CreateTextEnd())
169
170 # Finish the block of text
171 writer.WriteElement(eb.CreateTextEnd)
172
173 writer.End # save changes to the current page
174 doc.PagePushBack(page)
175
176 doc.Save($output_path + "unicodewrite.pdf", SDFDoc::E_remove_unused | SDFDoc::E_hex_strings)
177 puts "Done. Result saved in unicodewrite.pdf..."
178
179 doc.Close
180 PDFNet.Terminate
181end
182
183main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales