OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2009 Red Hat, Inc. | |
3 * Copyright (C) 2009 Keith Stribley | |
4 * | |
5 * This is part of HarfBuzz, a text shaping library. | |
6 * | |
7 * Permission is hereby granted, without written agreement and without | |
8 * license or royalty fees, to use, copy, modify, and distribute this | |
9 * software and its documentation for any purpose, provided that the | |
10 * above copyright notice and the following two paragraphs appear in | |
11 * all copies of this software. | |
12 * | |
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
17 * DAMAGE. | |
18 * | |
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
24 * | |
25 * Red Hat Author(s): Behdad Esfahbod | |
26 */ | |
27 | |
28 #include "hb-private.h" | |
29 | |
30 #include "hb-icu.h" | |
31 | |
32 #include "hb-unicode-private.h" | |
33 | |
34 #include <unicode/uversion.h> | |
35 #include <unicode/uchar.h> | |
36 #include <unicode/uscript.h> | |
37 | |
38 HB_BEGIN_DECLS | |
39 | |
40 | |
41 static hb_codepoint_t hb_icu_get_mirroring (hb_codepoint_t unicode) { return u_c
harMirror(unicode); } | |
42 static unsigned int hb_icu_get_combining_class (hb_codepoint_t unicode) { return
u_getCombiningClass (unicode); } | |
43 | |
44 static unsigned int | |
45 hb_icu_get_eastasian_width (hb_codepoint_t unicode) | |
46 { | |
47 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) | |
48 { | |
49 case U_EA_WIDE: | |
50 case U_EA_FULLWIDTH: | |
51 return 2; | |
52 case U_EA_NEUTRAL: | |
53 case U_EA_AMBIGUOUS: | |
54 case U_EA_HALFWIDTH: | |
55 case U_EA_NARROW: | |
56 return 1; | |
57 } | |
58 return 1; | |
59 } | |
60 | |
61 static hb_category_t | |
62 hb_icu_get_general_category (hb_codepoint_t unicode) | |
63 { | |
64 switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) | |
65 { | |
66 case U_UNASSIGNED: return HB_CATEGORY_UNASSIGNED; | |
67 | |
68 case U_UPPERCASE_LETTER: return HB_CATEGORY_UPPERCASE_LETTER;
/* Lu */ | |
69 case U_LOWERCASE_LETTER: return HB_CATEGORY_LOWERCASE_LETTER;
/* Ll */ | |
70 case U_TITLECASE_LETTER: return HB_CATEGORY_TITLECASE_LETTER;
/* Lt */ | |
71 case U_MODIFIER_LETTER: return HB_CATEGORY_MODIFIER_LETTER;
/* Lm */ | |
72 case U_OTHER_LETTER: return HB_CATEGORY_OTHER_LETTER;
/* Lo */ | |
73 | |
74 case U_NON_SPACING_MARK: return HB_CATEGORY_NON_SPACING_MARK;
/* Mn */ | |
75 case U_ENCLOSING_MARK: return HB_CATEGORY_ENCLOSING_MARK;
/* Me */ | |
76 case U_COMBINING_SPACING_MARK: return HB_CATEGORY_COMBINING_MARK;
/* Mc */ | |
77 | |
78 case U_DECIMAL_DIGIT_NUMBER: return HB_CATEGORY_DECIMAL_NUMBER;
/* Nd */ | |
79 case U_LETTER_NUMBER: return HB_CATEGORY_LETTER_NUMBER;
/* Nl */ | |
80 case U_OTHER_NUMBER: return HB_CATEGORY_OTHER_NUMBER;
/* No */ | |
81 | |
82 case U_SPACE_SEPARATOR: return HB_CATEGORY_SPACE_SEPARATOR;
/* Zs */ | |
83 case U_LINE_SEPARATOR: return HB_CATEGORY_LINE_SEPARATOR;
/* Zl */ | |
84 case U_PARAGRAPH_SEPARATOR: return HB_CATEGORY_PARAGRAPH_SEPARATOR;
/* Zp */ | |
85 | |
86 case U_CONTROL_CHAR: return HB_CATEGORY_CONTROL;
/* Cc */ | |
87 case U_FORMAT_CHAR: return HB_CATEGORY_FORMAT;
/* Cf */ | |
88 case U_PRIVATE_USE_CHAR: return HB_CATEGORY_PRIVATE_USE;
/* Co */ | |
89 case U_SURROGATE: return HB_CATEGORY_SURROGATE;
/* Cs */ | |
90 | |
91 | |
92 case U_DASH_PUNCTUATION: return HB_CATEGORY_DASH_PUNCTUATION;
/* Pd */ | |
93 case U_START_PUNCTUATION: return HB_CATEGORY_OPEN_PUNCTUATION;
/* Ps */ | |
94 case U_END_PUNCTUATION: return HB_CATEGORY_CLOSE_PUNCTUATION;
/* Pe */ | |
95 case U_CONNECTOR_PUNCTUATION: return HB_CATEGORY_CONNECT_PUNCTUATION;
/* Pc */ | |
96 case U_OTHER_PUNCTUATION: return HB_CATEGORY_OTHER_PUNCTUATION;
/* Po */ | |
97 | |
98 case U_MATH_SYMBOL: return HB_CATEGORY_MATH_SYMBOL;
/* Sm */ | |
99 case U_CURRENCY_SYMBOL: return HB_CATEGORY_CURRENCY_SYMBOL;
/* Sc */ | |
100 case U_MODIFIER_SYMBOL: return HB_CATEGORY_MODIFIER_SYMBOL;
/* Sk */ | |
101 case U_OTHER_SYMBOL: return HB_CATEGORY_OTHER_SYMBOL;
/* So */ | |
102 | |
103 case U_INITIAL_PUNCTUATION: return HB_CATEGORY_INITIAL_PUNCTUATION;
/* Pi */ | |
104 case U_FINAL_PUNCTUATION: return HB_CATEGORY_FINAL_PUNCTUATION;
/* Pf */ | |
105 } | |
106 | |
107 return HB_CATEGORY_UNASSIGNED; | |
108 } | |
109 | |
110 static hb_script_t | |
111 hb_icu_get_script (hb_codepoint_t unicode) | |
112 { | |
113 UErrorCode status = U_ZERO_ERROR; | |
114 UScriptCode scriptCode = uscript_getScript(unicode, &status); | |
115 switch ((int) scriptCode) | |
116 { | |
117 #define CHECK_ICU_VERSION(major, minor) \ | |
118 U_ICU_VERSION_MAJOR_NUM > (major) || (U_ICU_VERSION_MAJOR_NUM == (major)
&& U_ICU_VERSION_MINOR_NUM >= (minor)) | |
119 #define MATCH_SCRIPT(C) case USCRIPT_##C: return HB_SCRIPT_##C | |
120 #define MATCH_SCRIPT2(C1, C2) case USCRIPT_##C1: return HB_SCRIPT_##C2 | |
121 MATCH_SCRIPT (INVALID_CODE); | |
122 MATCH_SCRIPT (COMMON); /* Zyyy */ | |
123 MATCH_SCRIPT (INHERITED); /* Qaai */ | |
124 MATCH_SCRIPT (ARABIC); /* Arab */ | |
125 MATCH_SCRIPT (ARMENIAN); /* Armn */ | |
126 MATCH_SCRIPT (BENGALI); /* Beng */ | |
127 MATCH_SCRIPT (BOPOMOFO); /* Bopo */ | |
128 MATCH_SCRIPT (CHEROKEE); /* Cher */ | |
129 MATCH_SCRIPT (COPTIC); /* Qaac */ | |
130 MATCH_SCRIPT (CYRILLIC); /* Cyrl (Cyrs) */ | |
131 MATCH_SCRIPT (DESERET); /* Dsrt */ | |
132 MATCH_SCRIPT (DEVANAGARI); /* Deva */ | |
133 MATCH_SCRIPT (ETHIOPIC); /* Ethi */ | |
134 MATCH_SCRIPT (GEORGIAN); /* Geor (Geon); Geoa) */ | |
135 MATCH_SCRIPT (GOTHIC); /* Goth */ | |
136 MATCH_SCRIPT (GREEK); /* Grek */ | |
137 MATCH_SCRIPT (GUJARATI); /* Gujr */ | |
138 MATCH_SCRIPT (GURMUKHI); /* Guru */ | |
139 MATCH_SCRIPT (HAN); /* Hani */ | |
140 MATCH_SCRIPT (HANGUL); /* Hang */ | |
141 MATCH_SCRIPT (HEBREW); /* Hebr */ | |
142 MATCH_SCRIPT (HIRAGANA); /* Hira */ | |
143 MATCH_SCRIPT (KANNADA); /* Knda */ | |
144 MATCH_SCRIPT (KATAKANA); /* Kana */ | |
145 MATCH_SCRIPT (KHMER); /* Khmr */ | |
146 MATCH_SCRIPT (LAO); /* Laoo */ | |
147 MATCH_SCRIPT (LATIN); /* Latn (Latf); Latg) */ | |
148 MATCH_SCRIPT (MALAYALAM); /* Mlym */ | |
149 MATCH_SCRIPT (MONGOLIAN); /* Mong */ | |
150 MATCH_SCRIPT (MYANMAR); /* Mymr */ | |
151 MATCH_SCRIPT (OGHAM); /* Ogam */ | |
152 MATCH_SCRIPT (OLD_ITALIC); /* Ital */ | |
153 MATCH_SCRIPT (ORIYA); /* Orya */ | |
154 MATCH_SCRIPT (RUNIC); /* Runr */ | |
155 MATCH_SCRIPT (SINHALA); /* Sinh */ | |
156 MATCH_SCRIPT (SYRIAC); /* Syrc (Syrj, Syrn); Syre) */ | |
157 MATCH_SCRIPT (TAMIL); /* Taml */ | |
158 MATCH_SCRIPT (TELUGU); /* Telu */ | |
159 MATCH_SCRIPT (THAANA); /* Thaa */ | |
160 MATCH_SCRIPT (THAI); /* Thai */ | |
161 MATCH_SCRIPT (TIBETAN); /* Tibt */ | |
162 MATCH_SCRIPT (CANADIAN_ABORIGINAL);/* Cans */ | |
163 MATCH_SCRIPT (YI); /* Yiii */ | |
164 MATCH_SCRIPT (TAGALOG); /* Tglg */ | |
165 MATCH_SCRIPT (HANUNOO); /* Hano */ | |
166 MATCH_SCRIPT (BUHID); /* Buhd */ | |
167 MATCH_SCRIPT (TAGBANWA); /* Tagb */ | |
168 | |
169 /* Unicode-4.0 additions */ | |
170 MATCH_SCRIPT (BRAILLE); /* Brai */ | |
171 MATCH_SCRIPT (CYPRIOT); /* Cprt */ | |
172 MATCH_SCRIPT (LIMBU); /* Limb */ | |
173 MATCH_SCRIPT (OSMANYA); /* Osma */ | |
174 MATCH_SCRIPT (SHAVIAN); /* Shaw */ | |
175 MATCH_SCRIPT (LINEAR_B); /* Linb */ | |
176 MATCH_SCRIPT (TAI_LE); /* Tale */ | |
177 MATCH_SCRIPT (UGARITIC); /* Ugar */ | |
178 | |
179 /* Unicode-4.1 additions */ | |
180 MATCH_SCRIPT (NEW_TAI_LUE); /* Talu */ | |
181 MATCH_SCRIPT (BUGINESE); /* Bugi */ | |
182 MATCH_SCRIPT (GLAGOLITIC); /* Glag */ | |
183 MATCH_SCRIPT (TIFINAGH); /* Tfng */ | |
184 MATCH_SCRIPT (SYLOTI_NAGRI); /* Sylo */ | |
185 MATCH_SCRIPT (OLD_PERSIAN); /* Xpeo */ | |
186 MATCH_SCRIPT (KHAROSHTHI); /* Khar */ | |
187 | |
188 /* Unicode-5.0 additions */ | |
189 MATCH_SCRIPT (UNKNOWN); /* Zzzz */ | |
190 MATCH_SCRIPT (BALINESE); /* Bali */ | |
191 MATCH_SCRIPT (CUNEIFORM); /* Xsux */ | |
192 MATCH_SCRIPT (PHOENICIAN); /* Phnx */ | |
193 MATCH_SCRIPT (PHAGS_PA); /* Phag */ | |
194 MATCH_SCRIPT (NKO); /* Nkoo */ | |
195 | |
196 /* Unicode-5.1 additions */ | |
197 MATCH_SCRIPT (KAYAH_LI); /* Kali */ | |
198 MATCH_SCRIPT (LEPCHA); /* Lepc */ | |
199 MATCH_SCRIPT (REJANG); /* Rjng */ | |
200 MATCH_SCRIPT (SUNDANESE); /* Sund */ | |
201 MATCH_SCRIPT (SAURASHTRA); /* Saur */ | |
202 MATCH_SCRIPT (CHAM); /* Cham */ | |
203 MATCH_SCRIPT (OL_CHIKI); /* Olck */ | |
204 MATCH_SCRIPT (VAI); /* Vaii */ | |
205 MATCH_SCRIPT (CARIAN); /* Cari */ | |
206 MATCH_SCRIPT (LYCIAN); /* Lyci */ | |
207 MATCH_SCRIPT (LYDIAN); /* Lydi */ | |
208 | |
209 /* Unicode-5.2 additions */ | |
210 MATCH_SCRIPT (AVESTAN); /* Avst */ | |
211 #if CHECK_ICU_VERSION (4, 4) | |
212 MATCH_SCRIPT (BAMUM); /* Bamu */ | |
213 #endif | |
214 MATCH_SCRIPT (EGYPTIAN_HIEROGLYPHS); /* Egyp */ | |
215 MATCH_SCRIPT (IMPERIAL_ARAMAIC); /* Armi */ | |
216 MATCH_SCRIPT (INSCRIPTIONAL_PAHLAVI); /* Phli */ | |
217 MATCH_SCRIPT (INSCRIPTIONAL_PARTHIAN); /* Prti */ | |
218 MATCH_SCRIPT (JAVANESE); /* Java */ | |
219 MATCH_SCRIPT (KAITHI); /* Kthi */ | |
220 MATCH_SCRIPT2(LANNA, TAI_THAM); /* Lana */ | |
221 #if CHECK_ICU_VERSION (4, 4) | |
222 MATCH_SCRIPT (LISU); /* Lisu */ | |
223 #endif | |
224 MATCH_SCRIPT2(MEITEI_MAYEK, MEETEI_MAYEK);/* Mtei */ | |
225 #if CHECK_ICU_VERSION (4, 4) | |
226 MATCH_SCRIPT (OLD_SOUTH_ARABIAN); /* Sarb */ | |
227 #endif | |
228 MATCH_SCRIPT2(ORKHON, OLD_TURKIC); /* Orkh */ | |
229 MATCH_SCRIPT (SAMARITAN); /* Samr */ | |
230 MATCH_SCRIPT (TAI_VIET); /* Tavt */ | |
231 | |
232 /* Unicode-6.0 additions */ | |
233 MATCH_SCRIPT (BATAK); /* Batk */ | |
234 MATCH_SCRIPT (BRAHMI); /* Brah */ | |
235 MATCH_SCRIPT2(MANDAEAN, MANDAIC); /* Mand */ | |
236 | |
237 } | |
238 return HB_SCRIPT_UNKNOWN; | |
239 } | |
240 | |
241 static hb_unicode_funcs_t icu_ufuncs = { | |
242 HB_REFERENCE_COUNT_INVALID, /* ref_count */ | |
243 TRUE, /* immutable */ | |
244 { | |
245 hb_icu_get_general_category, | |
246 hb_icu_get_combining_class, | |
247 hb_icu_get_mirroring, | |
248 hb_icu_get_script, | |
249 hb_icu_get_eastasian_width | |
250 } | |
251 }; | |
252 | |
253 hb_unicode_funcs_t * | |
254 hb_icu_get_unicode_funcs (void) | |
255 { | |
256 return &icu_ufuncs; | |
257 } | |
258 | |
259 | |
260 HB_END_DECLS | |
OLD | NEW |