OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright © 2009 Red Hat, Inc. |
| 3 * Copyright © 2009 Keith Stribley |
| 4 * Copyright © 2011 Google, Inc. |
| 5 * |
| 6 * This is part of HarfBuzz, a text shaping library. |
| 7 * |
| 8 * Permission is hereby granted, without written agreement and without |
| 9 * license or royalty fees, to use, copy, modify, and distribute this |
| 10 * software and its documentation for any purpose, provided that the |
| 11 * above copyright notice and the following two paragraphs appear in |
| 12 * all copies of this software. |
| 13 * |
| 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 18 * DAMAGE. |
| 19 * |
| 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 25 * |
| 26 * Red Hat Author(s): Behdad Esfahbod |
| 27 * Google Author(s): Behdad Esfahbod |
| 28 */ |
| 29 |
| 30 #include "hb-private.hh" |
| 31 |
| 32 #include "hb-icu.h" |
| 33 |
| 34 #include "hb-unicode-private.hh" |
| 35 |
| 36 #include <unicode/uversion.h> |
| 37 #include <unicode/uchar.h> |
| 38 #include <unicode/unorm.h> |
| 39 #include <unicode/ustring.h> |
| 40 |
| 41 |
| 42 |
| 43 hb_script_t |
| 44 hb_icu_script_to_script (UScriptCode script) |
| 45 { |
| 46 if (unlikely (script == USCRIPT_INVALID_CODE)) |
| 47 return HB_SCRIPT_INVALID; |
| 48 |
| 49 return hb_script_from_string (uscript_getShortName (script), -1); |
| 50 } |
| 51 |
| 52 UScriptCode |
| 53 hb_icu_script_from_script (hb_script_t script) |
| 54 { |
| 55 if (unlikely (script == HB_SCRIPT_INVALID)) |
| 56 return USCRIPT_INVALID_CODE; |
| 57 |
| 58 for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++) |
| 59 if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) |
| 60 return (UScriptCode) i; |
| 61 |
| 62 return USCRIPT_UNKNOWN; |
| 63 } |
| 64 |
| 65 |
| 66 static unsigned int |
| 67 hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 68 hb_codepoint_t unicode, |
| 69 void *user_data HB_UNUSED) |
| 70 |
| 71 { |
| 72 return u_getCombiningClass (unicode); |
| 73 } |
| 74 |
| 75 static unsigned int |
| 76 hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 77 hb_codepoint_t unicode, |
| 78 void *user_data HB_UNUSED) |
| 79 { |
| 80 switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) |
| 81 { |
| 82 case U_EA_WIDE: |
| 83 case U_EA_FULLWIDTH: |
| 84 return 2; |
| 85 case U_EA_NEUTRAL: |
| 86 case U_EA_AMBIGUOUS: |
| 87 case U_EA_HALFWIDTH: |
| 88 case U_EA_NARROW: |
| 89 return 1; |
| 90 } |
| 91 return 1; |
| 92 } |
| 93 |
| 94 static hb_unicode_general_category_t |
| 95 hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 96 hb_codepoint_t unicode, |
| 97 void *user_data HB_UNUSED) |
| 98 { |
| 99 switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) |
| 100 { |
| 101 case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASS
IGNED; |
| 102 |
| 103 case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPER
CASE_LETTER; |
| 104 case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWER
CASE_LETTER; |
| 105 case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLE
CASE_LETTER; |
| 106 case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIF
IER_LETTER; |
| 107 case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER
_LETTER; |
| 108 |
| 109 case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_S
PACING_MARK; |
| 110 case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLO
SING_MARK; |
| 111 case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_SPACI
NG_MARK; |
| 112 |
| 113 case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIM
AL_NUMBER; |
| 114 case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTE
R_NUMBER; |
| 115 case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER
_NUMBER; |
| 116 |
| 117 case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE
_SEPARATOR; |
| 118 case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_
SEPARATOR; |
| 119 case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAG
RAPH_SEPARATOR; |
| 120 |
| 121 case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTR
OL; |
| 122 case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMA
T; |
| 123 case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVA
TE_USE; |
| 124 case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURRO
GATE; |
| 125 |
| 126 |
| 127 case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_
PUNCTUATION; |
| 128 case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_
PUNCTUATION; |
| 129 case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE
_PUNCTUATION; |
| 130 case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNE
CT_PUNCTUATION; |
| 131 case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER
_PUNCTUATION; |
| 132 |
| 133 case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_
SYMBOL; |
| 134 case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRE
NCY_SYMBOL; |
| 135 case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIF
IER_SYMBOL; |
| 136 case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER
_SYMBOL; |
| 137 |
| 138 case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITI
AL_PUNCTUATION; |
| 139 case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL
_PUNCTUATION; |
| 140 } |
| 141 |
| 142 return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; |
| 143 } |
| 144 |
| 145 static hb_codepoint_t |
| 146 hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 147 hb_codepoint_t unicode, |
| 148 void *user_data HB_UNUSED) |
| 149 { |
| 150 return u_charMirror(unicode); |
| 151 } |
| 152 |
| 153 static hb_script_t |
| 154 hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 155 hb_codepoint_t unicode, |
| 156 void *user_data HB_UNUSED) |
| 157 { |
| 158 UErrorCode status = U_ZERO_ERROR; |
| 159 UScriptCode scriptCode = uscript_getScript(unicode, &status); |
| 160 |
| 161 if (unlikely (U_FAILURE (status))) |
| 162 return HB_SCRIPT_UNKNOWN; |
| 163 |
| 164 return hb_icu_script_to_script (scriptCode); |
| 165 } |
| 166 |
| 167 static hb_bool_t |
| 168 hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 169 hb_codepoint_t a, |
| 170 hb_codepoint_t b, |
| 171 hb_codepoint_t *ab, |
| 172 void *user_data HB_UNUSED) |
| 173 { |
| 174 if (!a || !b) |
| 175 return FALSE; |
| 176 |
| 177 UChar utf16[4], normalized[5]; |
| 178 int len; |
| 179 hb_bool_t ret, err; |
| 180 UErrorCode icu_err; |
| 181 |
| 182 len = 0; |
| 183 err = FALSE; |
| 184 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err); |
| 185 if (err) return FALSE; |
| 186 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err); |
| 187 if (err) return FALSE; |
| 188 |
| 189 icu_err = U_ZERO_ERROR; |
| 190 len = unorm_normalize (utf16, len, UNORM_NFC, 0, normalized, ARRAY_LENGTH (nor
malized), &icu_err); |
| 191 if (U_FAILURE (icu_err)) |
| 192 return FALSE; |
| 193 if (u_countChar32 (normalized, len) == 1) { |
| 194 U16_GET_UNSAFE (normalized, 0, *ab); |
| 195 ret = TRUE; |
| 196 } else { |
| 197 ret = FALSE; |
| 198 } |
| 199 |
| 200 return ret; |
| 201 } |
| 202 |
| 203 static hb_bool_t |
| 204 hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 205 hb_codepoint_t ab, |
| 206 hb_codepoint_t *a, |
| 207 hb_codepoint_t *b, |
| 208 void *user_data HB_UNUSED) |
| 209 { |
| 210 UChar utf16[2], normalized[20]; |
| 211 int len; |
| 212 hb_bool_t ret, err; |
| 213 UErrorCode icu_err; |
| 214 |
| 215 /* This function is a monster! Maybe it wasn't a good idea adding a |
| 216 * pairwise decompose API... */ |
| 217 /* Watchout for the dragons. Err, watchout for macros changing len. */ |
| 218 |
| 219 len = 0; |
| 220 err = FALSE; |
| 221 U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); |
| 222 if (err) return FALSE; |
| 223 |
| 224 icu_err = U_ZERO_ERROR; |
| 225 len = unorm_normalize (utf16, len, UNORM_NFD, 0, normalized, ARRAY_LENGTH (nor
malized), &icu_err); |
| 226 if (U_FAILURE (icu_err)) |
| 227 return FALSE; |
| 228 |
| 229 len = u_countChar32 (normalized, len); |
| 230 |
| 231 if (len == 1) { |
| 232 U16_GET_UNSAFE (normalized, 0, *a); |
| 233 *b = 0; |
| 234 ret = *a != ab; |
| 235 } else if (len == 2) { |
| 236 len =0; |
| 237 U16_NEXT_UNSAFE (normalized, len, *a); |
| 238 U16_NEXT_UNSAFE (normalized, len, *b); |
| 239 |
| 240 /* Here's the ugly part: if ab decomposes to a single character and |
| 241 * that character decomposes again, we have to detect that and undo |
| 242 * the second part :-(. */ |
| 243 UChar recomposed[20]; |
| 244 icu_err = U_ZERO_ERROR; |
| 245 unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (re
composed), &icu_err); |
| 246 if (U_FAILURE (icu_err)) |
| 247 return FALSE; |
| 248 hb_codepoint_t c; |
| 249 U16_GET_UNSAFE (recomposed, 0, c); |
| 250 if (c != *a && c != ab) { |
| 251 *a = c; |
| 252 *b = 0; |
| 253 } |
| 254 ret = TRUE; |
| 255 } else { |
| 256 /* If decomposed to more than two characters, take the last one, |
| 257 * and recompose the rest to get the first component. */ |
| 258 U16_PREV_UNSAFE (normalized, len, *b); |
| 259 UChar recomposed[20]; |
| 260 icu_err = U_ZERO_ERROR; |
| 261 len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENG
TH (recomposed), &icu_err); |
| 262 if (U_FAILURE (icu_err)) |
| 263 return FALSE; |
| 264 /* We expect that recomposed has exactly one character now. */ |
| 265 U16_GET_UNSAFE (recomposed, 0, *a); |
| 266 ret = TRUE; |
| 267 } |
| 268 |
| 269 return ret; |
| 270 } |
| 271 |
| 272 extern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_icu; |
| 273 hb_unicode_funcs_t _hb_icu_unicode_funcs = { |
| 274 HB_OBJECT_HEADER_STATIC, |
| 275 |
| 276 NULL, /* parent */ |
| 277 TRUE, /* immutable */ |
| 278 { |
| 279 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, |
| 280 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS |
| 281 #undef HB_UNICODE_FUNC_IMPLEMENT |
| 282 } |
| 283 }; |
| 284 |
| 285 hb_unicode_funcs_t * |
| 286 hb_icu_get_unicode_funcs (void) |
| 287 { |
| 288 return &_hb_icu_unicode_funcs; |
| 289 } |
| 290 |
| 291 |
OLD | NEW |