OLD | NEW |
1 /* | 1 /* |
2 * Copyright © 2009 Red Hat, Inc. | 2 * Copyright © 2009 Red Hat, Inc. |
3 * Copyright © 2009 Keith Stribley | 3 * Copyright © 2009 Keith Stribley |
4 * Copyright © 2011 Google, Inc. | 4 * Copyright © 2011 Google, Inc. |
5 * | 5 * |
6 * This is part of HarfBuzz, a text shaping library. | 6 * This is part of HarfBuzz, a text shaping library. |
7 * | 7 * |
8 * Permission is hereby granted, without written agreement and without | 8 * Permission is hereby granted, without written agreement and without |
9 * license or royalty fees, to use, copy, modify, and distribute this | 9 * license or royalty fees, to use, copy, modify, and distribute this |
10 * software and its documentation for any purpose, provided that the | 10 * software and its documentation for any purpose, provided that the |
(...skipping 15 matching lines...) Expand all Loading... |
26 * Red Hat Author(s): Behdad Esfahbod | 26 * Red Hat Author(s): Behdad Esfahbod |
27 * Google Author(s): Behdad Esfahbod | 27 * Google Author(s): Behdad Esfahbod |
28 */ | 28 */ |
29 | 29 |
30 #include "hb-private.hh" | 30 #include "hb-private.hh" |
31 | 31 |
32 #include "hb-icu.h" | 32 #include "hb-icu.h" |
33 | 33 |
34 #include "hb-unicode-private.hh" | 34 #include "hb-unicode-private.hh" |
35 | 35 |
36 #include <unicode/uversion.h> | |
37 #include <unicode/uchar.h> | 36 #include <unicode/uchar.h> |
38 #include <unicode/unorm.h> | 37 #include <unicode/unorm.h> |
39 #include <unicode/ustring.h> | 38 #include <unicode/ustring.h> |
40 | 39 #include <unicode/uversion.h> |
41 | 40 |
42 | 41 |
43 hb_script_t | 42 hb_script_t |
44 hb_icu_script_to_script (UScriptCode script) | 43 hb_icu_script_to_script (UScriptCode script) |
45 { | 44 { |
46 if (unlikely (script == USCRIPT_INVALID_CODE)) | 45 if (unlikely (script == USCRIPT_INVALID_CODE)) |
47 return HB_SCRIPT_INVALID; | 46 return HB_SCRIPT_INVALID; |
48 | 47 |
49 return hb_script_from_string (uscript_getShortName (script), -1); | 48 return hb_script_from_string (uscript_getShortName (script), -1); |
50 } | 49 } |
51 | 50 |
52 UScriptCode | 51 UScriptCode |
53 hb_icu_script_from_script (hb_script_t script) | 52 hb_icu_script_from_script (hb_script_t script) |
54 { | 53 { |
55 if (unlikely (script == HB_SCRIPT_INVALID)) | 54 if (unlikely (script == HB_SCRIPT_INVALID)) |
56 return USCRIPT_INVALID_CODE; | 55 return USCRIPT_INVALID_CODE; |
57 | 56 |
58 for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++) | 57 for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++) |
59 if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) | 58 if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) |
60 return (UScriptCode) i; | 59 return (UScriptCode) i; |
61 | 60 |
62 return USCRIPT_UNKNOWN; | 61 return USCRIPT_UNKNOWN; |
63 } | 62 } |
64 | 63 |
65 | 64 |
66 static unsigned int | 65 static hb_unicode_combining_class_t |
67 hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 66 hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
68 hb_codepoint_t unicode, | 67 hb_codepoint_t unicode, |
69 void *user_data HB_UNUSED) | 68 void *user_data HB_UNUSED) |
70 | 69 |
71 { | 70 { |
72 return u_getCombiningClass (unicode); | 71 return (hb_unicode_combining_class_t) u_getCombiningClass (unicode); |
73 } | 72 } |
74 | 73 |
75 static unsigned int | 74 static unsigned int |
76 hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 75 hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
77 hb_codepoint_t unicode, | 76 hb_codepoint_t unicode, |
78 void *user_data HB_UNUSED) | 77 void *user_data HB_UNUSED) |
79 { | 78 { |
80 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) | 79 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) |
81 { | 80 { |
82 case U_EA_WIDE: | 81 case U_EA_WIDE: |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
157 { | 156 { |
158 UErrorCode status = U_ZERO_ERROR; | 157 UErrorCode status = U_ZERO_ERROR; |
159 UScriptCode scriptCode = uscript_getScript(unicode, &status); | 158 UScriptCode scriptCode = uscript_getScript(unicode, &status); |
160 | 159 |
161 if (unlikely (U_FAILURE (status))) | 160 if (unlikely (U_FAILURE (status))) |
162 return HB_SCRIPT_UNKNOWN; | 161 return HB_SCRIPT_UNKNOWN; |
163 | 162 |
164 return hb_icu_script_to_script (scriptCode); | 163 return hb_icu_script_to_script (scriptCode); |
165 } | 164 } |
166 | 165 |
| 166 #if U_ICU_VERSION_MAJOR_NUM >= 49 |
| 167 static const UNormalizer2 *normalizer; |
| 168 #endif |
| 169 |
167 static hb_bool_t | 170 static hb_bool_t |
168 hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 171 hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
169 hb_codepoint_t a, | 172 hb_codepoint_t a, |
170 hb_codepoint_t b, | 173 hb_codepoint_t b, |
171 hb_codepoint_t *ab, | 174 hb_codepoint_t *ab, |
172 void *user_data HB_UNUSED) | 175 void *user_data HB_UNUSED) |
173 { | 176 { |
174 if (!a || !b) | 177 #if U_ICU_VERSION_MAJOR_NUM >= 49 |
175 return false; | 178 { |
| 179 UChar32 ret = unorm2_composePair (normalizer, a, b); |
| 180 if (ret < 0) return false; |
| 181 *ab = ret; |
| 182 return true; |
| 183 } |
| 184 #endif |
| 185 |
| 186 /* We don't ifdef-out the fallback code such that compiler always |
| 187 * sees it and makes sure it's compilable. */ |
176 | 188 |
177 UChar utf16[4], normalized[5]; | 189 UChar utf16[4], normalized[5]; |
178 int len; | 190 unsigned int len; |
179 hb_bool_t ret, err; | 191 hb_bool_t ret, err; |
180 UErrorCode icu_err; | 192 UErrorCode icu_err; |
181 | 193 |
182 len = 0; | 194 len = 0; |
183 err = false; | 195 err = false; |
184 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err); | 196 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err); |
185 if (err) return false; | 197 if (err) return false; |
186 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err); | 198 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err); |
187 if (err) return false; | 199 if (err) return false; |
188 | 200 |
(...skipping 11 matching lines...) Expand all Loading... |
200 return ret; | 212 return ret; |
201 } | 213 } |
202 | 214 |
203 static hb_bool_t | 215 static hb_bool_t |
204 hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 216 hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
205 hb_codepoint_t ab, | 217 hb_codepoint_t ab, |
206 hb_codepoint_t *a, | 218 hb_codepoint_t *a, |
207 hb_codepoint_t *b, | 219 hb_codepoint_t *b, |
208 void *user_data HB_UNUSED) | 220 void *user_data HB_UNUSED) |
209 { | 221 { |
210 UChar utf16[2], normalized[20]; | 222 #if U_ICU_VERSION_MAJOR_NUM >= 49 |
211 int len; | 223 { |
| 224 UChar decomposed[4]; |
| 225 int len; |
| 226 UErrorCode icu_err = U_ZERO_ERROR; |
| 227 len = unorm2_getRawDecomposition (normalizer, ab, decomposed, |
| 228 » » » » ARRAY_LENGTH (decomposed), &icu_err); |
| 229 if (U_FAILURE (icu_err) || len < 0) return false; |
| 230 |
| 231 len = u_countChar32 (decomposed, len); |
| 232 if (len == 1) { |
| 233 U16_GET_UNSAFE (decomposed, 0, *a); |
| 234 *b = 0; |
| 235 return *a != ab; |
| 236 } else if (len == 2) { |
| 237 len =0; |
| 238 U16_NEXT_UNSAFE (decomposed, len, *a); |
| 239 U16_NEXT_UNSAFE (decomposed, len, *b); |
| 240 } |
| 241 return true; |
| 242 } |
| 243 #endif |
| 244 |
| 245 /* We don't ifdef-out the fallback code such that compiler always |
| 246 * sees it and makes sure it's compilable. */ |
| 247 |
| 248 UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1]; |
| 249 unsigned int len; |
212 hb_bool_t ret, err; | 250 hb_bool_t ret, err; |
213 UErrorCode icu_err; | 251 UErrorCode icu_err; |
214 | 252 |
215 /* This function is a monster! Maybe it wasn't a good idea adding a | 253 /* This function is a monster! Maybe it wasn't a good idea adding a |
216 * pairwise decompose API... */ | 254 * pairwise decompose API... */ |
217 /* Watchout for the dragons. Err, watchout for macros changing len. */ | 255 /* Watchout for the dragons. Err, watchout for macros changing len. */ |
218 | 256 |
219 len = 0; | 257 len = 0; |
220 err = false; | 258 err = false; |
221 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); | 259 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
264 /* We expect that recomposed has exactly one character now. */ | 302 /* We expect that recomposed has exactly one character now. */ |
265 if (unlikely (u_countChar32 (recomposed, len) != 1)) | 303 if (unlikely (u_countChar32 (recomposed, len) != 1)) |
266 return false; | 304 return false; |
267 U16_GET_UNSAFE (recomposed, 0, *a); | 305 U16_GET_UNSAFE (recomposed, 0, *a); |
268 ret = true; | 306 ret = true; |
269 } | 307 } |
270 | 308 |
271 return ret; | 309 return ret; |
272 } | 310 } |
273 | 311 |
| 312 static unsigned int |
| 313 hb_icu_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 314 hb_codepoint_t u, |
| 315 hb_codepoint_t *decomposed, |
| 316 void *user_data HB_UNUSED) |
| 317 { |
| 318 UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1]; |
| 319 unsigned int len; |
| 320 int32_t utf32_len; |
| 321 hb_bool_t err; |
| 322 UErrorCode icu_err; |
274 | 323 |
275 extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs; | 324 /* Copy @u into a UTF-16 array to be passed to ICU. */ |
276 const hb_unicode_funcs_t _hb_icu_unicode_funcs = { | 325 len = 0; |
277 HB_OBJECT_HEADER_STATIC, | 326 err = FALSE; |
| 327 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), u, err); |
| 328 if (err) |
| 329 return 0; |
278 | 330 |
279 NULL, /* parent */ | 331 /* Normalise the codepoint using NFKD mode. */ |
280 true, /* immutable */ | 332 icu_err = U_ZERO_ERROR; |
281 { | 333 len = unorm_normalize (utf16, len, UNORM_NFKD, 0, normalized, ARRAY_LENGTH (no
rmalized), &icu_err); |
282 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, | 334 if (icu_err) |
283 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS | 335 return 0; |
284 #undef HB_UNICODE_FUNC_IMPLEMENT | 336 |
285 } | 337 /* Convert the decomposed form from UTF-16 to UTF-32. */ |
286 }; | 338 icu_err = U_ZERO_ERROR; |
| 339 u_strToUTF32 ((UChar32*) decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN, &utf32_
len, normalized, len, &icu_err); |
| 340 if (icu_err) |
| 341 return 0; |
| 342 |
| 343 return utf32_len; |
| 344 } |
| 345 |
287 | 346 |
288 hb_unicode_funcs_t * | 347 hb_unicode_funcs_t * |
289 hb_icu_get_unicode_funcs (void) | 348 hb_icu_get_unicode_funcs (void) |
290 { | 349 { |
| 350 static const hb_unicode_funcs_t _hb_icu_unicode_funcs = { |
| 351 HB_OBJECT_HEADER_STATIC, |
| 352 |
| 353 NULL, /* parent */ |
| 354 true, /* immutable */ |
| 355 { |
| 356 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, |
| 357 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS |
| 358 #undef HB_UNICODE_FUNC_IMPLEMENT |
| 359 } |
| 360 }; |
| 361 |
| 362 #if U_ICU_VERSION_MAJOR_NUM >= 49 |
| 363 if (!hb_atomic_ptr_get (&normalizer)) { |
| 364 UErrorCode icu_err = U_ZERO_ERROR; |
| 365 /* We ignore failure in getNFCInstace(). */ |
| 366 hb_atomic_ptr_cmpexch (&normalizer, NULL, unorm2_getNFCInstance (&icu_err)); |
| 367 } |
| 368 #endif |
291 return const_cast<hb_unicode_funcs_t *> (&_hb_icu_unicode_funcs); | 369 return const_cast<hb_unicode_funcs_t *> (&_hb_icu_unicode_funcs); |
292 } | 370 } |
293 | 371 |
294 | 372 |
OLD | NEW |