Some test text!

Search
Hamburger Icon

Create Unicode text or embed composite fonts in PDF files using C++

More languages

More languages
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample C++ code for using PDFTron SDK to create Unicode text and embed composite fonts in PDF files. Learn more about our C++ PDF Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------
#include <PDF/PDFNet.h>
#include <PDF/PDFDoc.h>
#include <PDF/ElementBuilder.h>
#include <PDF/ElementWriter.h>
#include <PDF/ColorSpace.h>
#include <PDF/ShapedText.h>
#include <PDF/Rect.h>

#include <Filters/MappedFile.h>
#include <Filters/FilterReader.h>

#include <fstream>
#include <iostream>
#include "../../LicenseKey/CPP/LicenseKey.h"

using namespace std;

using namespace pdftron;
using namespace SDF;
using namespace PDF;

std::vector<UString> TextFileToStringList(const UString& file_path);

/**
 * This example illustrates how to create Unicode text and how to embed composite fonts.
 * 
 * Note: This demo attempts to make use of 'arialuni.ttf' in the '/Samples/TestFiles' 
 * directory. Arial Unicode MS is about 24MB in size and used to come together with Windows and 
 * MS Office.
 * 
 * In case you don't have access to Arial Unicode MS you can use another wide coverage
 * font, like Google Noto, GNU UniFont, or cyberbit. Many of these are freely available,
 * and there is a list maintained at https://en.wikipedia.org/wiki/Unicode_font
 * 
 * If no specific font file can be loaded, the demo will fall back to system specific font
 * substitution routines, and the result will depend on which fonts are available.
 * 
 */
int main(int argc, char *argv[])
{
	int ret = 0;
	PDFNet::Initialize(LicenseKey);

	// Relative path to the folder containing test files.
	string input_path =  "../../TestFiles/";
	string output_path = "../../TestFiles/Output/";

	try  
	{	 
		PDFDoc doc;

		ElementBuilder eb;		
		ElementWriter writer;	

		// Start a new page ------------------------------------
		Page page = doc.PageCreate(Rect(0, 0, 612, 794));

		writer.Begin(page);	// begin writing to this page

		string font_program = input_path + "ARIALUNI.TTF";

		// RAII block for ifstream
		{
			std::ifstream ifs(font_program.c_str(), ios_base::in);
#if defined(_WIN32)
			if (!ifs.is_open()) {
				font_program = string("C:/Windows/Fonts/ARIALUNI.TTF");
				ifs.open(font_program.c_str(), ios_base::in);
			}
#endif
			if (!ifs.is_open()) {
				font_program.clear();
			}
		}

		Font fnt;
		if(font_program.size())
		{
			cout << "Note: using " << font_program << " for unshaped unicode text" << endl;
			// if we can find a specific wide-coverage font file, then use that directly
			fnt = Font::CreateCIDTrueTypeFont(doc, font_program.c_str(), true, true);
		}
		else
		{
			cout << "Note: using system font substitution for unshaped unicode text" << endl;
			// if we can't find a specific file, then use system font subsitution 
			// as a fallback, using "Helvetica" as a hint
			UString empty_temp;
			fnt = Font::Create(doc, "Helvetica", empty_temp);
		}
		
		Element element = eb.CreateTextBegin(fnt, 1);
		element.SetTextMatrix(10, 0, 0, 10, 50, 600);
		element.GetGState().SetLeading(2);		 // Set the spacing between lines
		writer.WriteElement(element);

		// Hello World!
		Unicode hello[] = { 'H','e','l','l','o',' ','W','o','r','l','d','!'};
		writer.WriteElement(eb.CreateUnicodeTextRun(hello, sizeof(hello)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Latin
		Unicode latin[] = {   
			'a', 'A', 'b', 'B', 'c', 'C', 'd', 'D', 0x45, 0x0046, 0x00C0, 
			0x00C1, 0x00C2, 0x0143, 0x0144, 0x0145, 0x0152, '1', '2' // etc.
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(latin, sizeof(latin)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Greek
		Unicode greek[] = {   
			0x039E, 0x039F, 0x03A0, 0x03A1,0x03A3, 0x03A6, 0x03A8, 0x03A9  // etc.
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(greek, sizeof(greek)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Cyrillic
		Unicode cyrillic[] = {   
			0x0409, 0x040A, 0x040B, 0x040C, 0x040E, 0x040F, 0x0410, 0x0411,
			0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419 // etc.
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(cyrillic, sizeof(cyrillic)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Hebrew
		Unicode hebrew[] = {
			0x05D0, 0x05D1, 0x05D3, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 
			0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1 // etc. 
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(hebrew, sizeof(hebrew)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Arabic
		Unicode arabic[] = {
			0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 
			0x062D, 0x062E, 0x062F, 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635 // etc. 
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(arabic, sizeof(arabic)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Thai 
		Unicode thai[] = {
			0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 
			0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12 // etc. 
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(thai, sizeof(thai)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Hiragana - Japanese 
		Unicode hiragana[] = {
			0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 
			0x304A, 0x304B, 0x304C, 0x304D, 0x304E, 0x304F, 0x3051, 0x3051, 0x3052 // etc. 
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(hiragana, sizeof(hiragana)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// CJK Unified Ideographs
		Unicode cjk_uni[] = {
			0x5841, 0x5842, 0x5843, 0x5844, 0x5845, 0x5846, 0x5847, 0x5848, 0x5849, 
			0x584A, 0x584B, 0x584C, 0x584D, 0x584E, 0x584F, 0x5850, 0x5851, 0x5852 // etc. 
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(cjk_uni, sizeof(cjk_uni)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Simplified Chinese
		Unicode chinese_simplified[] = {
			0x4e16, 0x754c, 0x60a8, 0x597d
		};
		writer.WriteElement(eb.CreateUnicodeTextRun(chinese_simplified, sizeof(chinese_simplified)/sizeof(Unicode)));
		writer.WriteElement(eb.CreateTextNewLine());

		// Finish the block of text
		writer.WriteElement(eb.CreateTextEnd());

		cout << "Now using text shaping logic to place text" << endl;

		// Create a font in indexed encoding mode 
		// normally this would mean that we are required to provide glyph indices
		// directly to CreateUnicodeTextRun, but instead, we will use the GetShapedText
		// method to take care of this detail for us.
		Font indexed_font = Font::CreateCIDTrueTypeFont(doc, input_path + "NotoSans_with_hindi.ttf", true, true, Font::e_Indices);
		element = eb.CreateTextBegin(indexed_font, 10);
		writer.WriteElement(element);

		double line_pos = 350.0;
		double line_space = 20.0;

		// Transform unicode text into an abstract collection of glyph indices and positioning info 
		ShapedText shaped_text = indexed_font.GetShapedText(UString("Shaped Hindi Text:"));

		// transform the shaped text info into a PDF element and write it to the page
		element = eb.CreateShapedTextRun(shaped_text);
		element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos);
		writer.WriteElement(element);

		// read in unicode text lines from a file 
		std::vector<UString> hindi_text = TextFileToStringList(input_path + "hindi_sample_utf16le.txt");

		cout << "Read in " << hindi_text.size() << " lines of Unicode text from file" << endl;
		for (size_t i = 0; i < hindi_text.size(); ++i)
		{
			shaped_text = indexed_font.GetShapedText(hindi_text[i]);
			element = eb.CreateShapedTextRun(shaped_text);
			element.SetTextMatrix(1.5, 0, 0, 1.5, 50, line_pos-line_space*(i+1));
			writer.WriteElement(element);
			cout << "Wrote shaped line to page" << endl;
		}
		
		// Finish the shaped block of text
		writer.WriteElement(eb.CreateTextEnd());

		writer.End();  // save changes to the current page
		doc.PagePushBack(page);

		doc.Save((output_path + "unicodewrite.pdf").c_str(), SDFDoc::e_remove_unused | SDFDoc::e_hex_strings , NULL);
		cout << "Done. Result saved in unicodewrite.pdf..." << endl;
	}
	catch(Common::Exception& e)
	{
		cout << e << endl;
		ret = 1;
	}
	catch(...)
	{
		cout << "Unknown Exception" << endl;
		ret = 1;
	}
	
	PDFNet::Terminate();
	return ret;
}

std::vector<UString> TextFileToStringList(const UString& file_path)
{
	Filters::MappedFile utf_text_filter(file_path);
	size_t file_size = utf_text_filter.FileSize();
	Filters::FilterReader utf_reader(utf_text_filter);
	std::vector<unsigned char> data = utf_reader.Read(file_size);
	data.push_back(0);
	data.push_back(0);
	std::vector<UString> ret;
	size_t line_start = 0;
	for (size_t i = 0; i+1 < data.size(); i +=2)
	{
		bool has_newline = false;
		size_t end_index = i;
		while(i+1 < data.size() && data[i+1] == 0 && (data[i] == '\n' || data[i] == '\r'))
		{
			i+=2;
			has_newline = true;
		}
		if(has_newline || (i + 2 >=  data.size() && line_start < data.size()))
		{
			ret.push_back(UString(reinterpret_cast<Unicode*>(&data[line_start]), (end_index - line_start)/2));
			line_start = i;
		}
	}
	return ret;
}