Create Unicode Text, Embed CID in PDFs - Python Sample Code

Sample code for using Apryse SDK to create Unicode text and embed composite fonts in PDF files. Samples provided in Python, C++, C#, Java, Node.js (JavaScript), PHP, Ruby, Go and VB. Learn more about our Server SDK.

1#---------------------------------------------------------------------------------------
2# Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
3# Consult LICENSE.txt regarding license information.
4#---------------------------------------------------------------------------------------
5
6import site
7site.addsitedir("../../../PDFNetC/Lib")
8import sys
9from PDFNetPython import *
10import os, io
11
12sys.path.append("../../LicenseKey/PYTHON")
13from LicenseKey import *
14
15# Relative path to the folder containing the test files.
16input_path = "../../TestFiles/"
17output_path = "../../TestFiles/Output/"
18
19# This example illustrates how to create Unicode text and how to embed composite fonts.
20#
21# Note: This demo assumes that 'arialuni.ttf' is present in '/Samples/TestFiles'
22# directory. Arial Unicode MS is about 24MB in size and it comes together with Windows and
23# MS Office.
24#
25# For more information about Arial Unicode MS, please consult the following Microsoft Knowledge
26# Base Article: WD2002: General Information About the Arial Unicode MS Font
27# http://support.microsoft.com/support/kb/articles/q287/2/47.asp
28#
29# For more information consult:
30# http://office.microsoft.com/search/results.aspx?Scope=DC&Query=font&CTT=6&Origin=EC010331121033
31# http://www.microsoft.com/downloads/details.aspx?FamilyID=1F0303AE-F055-41DA-A086-A65F22CB5593
32#
33# In case you don't have access to Arial Unicode MS you can use cyberbit.ttf
34# (http://ftp.netscape.com/pub/communicator/extras/fonts/windows/) instead.
35def main():
36 PDFNet.Initialize(LicenseKey)
37
38 doc = PDFDoc()
39 eb = ElementBuilder()
40 writer = ElementWriter()
41
42 # Start a new page ------------------------------------
43 page = doc.PageCreate(Rect(0, 0, 612, 794))
44
45 writer.Begin(page) # begin writing to this page
46
47 # Embed and subset the font
48 font_program = input_path + "ARIALUNI.TTF"
49 if not os.path.isfile(font_program):
50 if sys.platform == 'win32':
51 font_program = "C:/Windows/Fonts/ARIALUNI.TTF"
52 fnt = None
53 try:
54 fnt = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), font_program, True, True)
55 except:
56 pass
57
58 if fnt:
59 print("Note: using " + font_program + " for unshaped unicode text")
60 else:
61 print("Note: using system font substitution for unshaped unicode text")
62 fnt = Font.Create(doc.GetSDFDoc(), "Helvetica", "")
63
64 element = eb.CreateTextBegin(fnt, 1)
65 element.SetTextMatrix(10, 0, 0, 10, 50, 600)
66 element.GetGState().SetLeading(2) # Set the spacing between lines
67 writer.WriteElement(element)
68
69 # Hello World!
70 hello = ['H','e','l','l','o',' ','W','o','r','l','d','!']
71 writer.WriteElement(eb.CreateUnicodeTextRun(hello, len(hello)))
72 writer.WriteElement(eb.CreateTextNewLine())
73
74 # Latin
75 latin = ['a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0,
76 0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' ]# etc.
77 writer.WriteElement(eb.CreateUnicodeTextRun((latin), len(latin)))
78 writer.WriteElement(eb.CreateTextNewLine())
79
80 # Greek
81 greek = [0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9]
82 writer.WriteElement(eb.CreateUnicodeTextRun((greek), len(greek)))
83 writer.WriteElement(eb.CreateTextNewLine())
84
85 # Cyrillic
86 cyrillic = [0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
87 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419]
88 writer.WriteElement(eb.CreateUnicodeTextRun((cyrillic), len(cyrillic)))
89 writer.WriteElement(eb.CreateTextNewLine())
90
91 # Hebrew
92 hebrew = [0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8,
93 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1]
94 writer.WriteElement(eb.CreateUnicodeTextRun((hebrew), len(hebrew)))
95 writer.WriteElement(eb.CreateTextNewLine())
96
97 # Arabic
98 arabic = [0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C,
99 0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635]
100 writer.WriteElement(eb.CreateUnicodeTextRun((arabic), len(arabic)))
101 writer.WriteElement(eb.CreateTextNewLine())
102
103 # Thai
104 thai = [0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09,
105 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12]
106 writer.WriteElement(eb.CreateUnicodeTextRun((thai), len(thai)))
107 writer.WriteElement(eb.CreateTextNewLine())
108
109 # Hiragana - Japanese
110 hiragana = [0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049,
111 0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052]
112 writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, len(hiragana)))
113 writer.WriteElement(eb.CreateTextNewLine())
114
115 # CJK Unified Ideographs
116 cjk_uni = [0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849,
117 0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852]
118 writer.WriteElement(eb.CreateUnicodeTextRun((cjk_uni), len(cjk_uni)))
119 writer.WriteElement(eb.CreateTextNewLine())
120
121 # Simplified Chinese
122 chinese_simplified = [0x4e16, 0x754c, 0x60a8, 0x597d]
123 writer.WriteElement(eb.CreateUnicodeTextRun((chinese_simplified), len(chinese_simplified)))
124 writer.WriteElement(eb.CreateTextNewLine())
125
126 # Finish the block of text
127 writer.WriteElement(eb.CreateTextEnd())
128
129 print("Now using text shaping logic to place text")
130
131 # Create a font in indexed encoding mode
132 # normally this would mean that we are required to provide glyph indices
133 # directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
134 # method to take care of this detail for us.
135 indexed_font = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), input_path + "NotoSans_with_hindi.ttf", True, True, Font.e_Indices)
136 element = eb.CreateTextBegin(indexed_font, 10)
137 writer.WriteElement(element)
138
139 line_pos = 350.0
140 line_space = 20.0
141
142 # Transform unicode text into an abstract collection of glyph indices and positioning info
143 shaped_text = indexed_font.GetShapedText("Shaped Hindi Text:")
144
145 # transform the shaped text info into a PDF element and write it to the page
146 element = eb.CreateShapedTextRun(shaped_text);
147 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos);
148 writer.WriteElement(element);
149
150 # read in unicode text lines from a file
151 with io.open(input_path + "hindi_sample_utf16le.txt", "r", encoding='utf-16-le') as f:
152 hindi_text = f.readlines()
153 print("Read in " + str(len(hindi_text)) + " lines of Unicode text from file")
154 for i in range(len(hindi_text)):
155 shaped_text = indexed_font.GetShapedText(hindi_text[i][:-1])
156 element = eb.CreateShapedTextRun(shaped_text)
157 element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1))
158 writer.WriteElement(element)
159 print("Wrote shaped line to page")
160
161 # Finish the block of text
162 writer.WriteElement(eb.CreateTextEnd())
163
164 writer.End() # save changes to the current page
165 doc.PagePushBack(page)
166
167 doc.Save(output_path + "unicodewrite.pdf", SDFDoc.e_remove_unused | SDFDoc.e_hex_strings)
168 print("Done. Result saved in unicodewrite.pdf...")
169
170 doc.Close()
171 PDFNet.Terminate()
172
173if __name__ == '__main__':
174 main()

Did you find this helpful?

Trial setup questions?

Ask experts on Discord

Need other help?

Contact Support

Pricing or product questions?

Contact Sales