OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright © 2009 Red Hat, Inc. |
| 3 * Copyright © 2011 Google, Inc. |
| 4 * |
| 5 * This is part of HarfBuzz, a text shaping library. |
| 6 * |
| 7 * Permission is hereby granted, without written agreement and without |
| 8 * license or royalty fees, to use, copy, modify, and distribute this |
| 9 * software and its documentation for any purpose, provided that the |
| 10 * above copyright notice and the following two paragraphs appear in |
| 11 * all copies of this software. |
| 12 * |
| 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 17 * DAMAGE. |
| 18 * |
| 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 24 * |
| 25 * Red Hat Author(s): Behdad Esfahbod |
| 26 * Google Author(s): Behdad Esfahbod |
| 27 */ |
| 28 |
| 29 #include "hb-private.hh" |
| 30 |
| 31 #include "hb-glib.h" |
| 32 |
| 33 #include "hb-unicode-private.hh" |
| 34 |
| 35 |
| 36 #if !GLIB_CHECK_VERSION(2,29,14) |
| 37 static const hb_script_t |
| 38 glib_script_to_script[] = |
| 39 { |
| 40 HB_SCRIPT_COMMON, |
| 41 HB_SCRIPT_INHERITED, |
| 42 HB_SCRIPT_ARABIC, |
| 43 HB_SCRIPT_ARMENIAN, |
| 44 HB_SCRIPT_BENGALI, |
| 45 HB_SCRIPT_BOPOMOFO, |
| 46 HB_SCRIPT_CHEROKEE, |
| 47 HB_SCRIPT_COPTIC, |
| 48 HB_SCRIPT_CYRILLIC, |
| 49 HB_SCRIPT_DESERET, |
| 50 HB_SCRIPT_DEVANAGARI, |
| 51 HB_SCRIPT_ETHIOPIC, |
| 52 HB_SCRIPT_GEORGIAN, |
| 53 HB_SCRIPT_GOTHIC, |
| 54 HB_SCRIPT_GREEK, |
| 55 HB_SCRIPT_GUJARATI, |
| 56 HB_SCRIPT_GURMUKHI, |
| 57 HB_SCRIPT_HAN, |
| 58 HB_SCRIPT_HANGUL, |
| 59 HB_SCRIPT_HEBREW, |
| 60 HB_SCRIPT_HIRAGANA, |
| 61 HB_SCRIPT_KANNADA, |
| 62 HB_SCRIPT_KATAKANA, |
| 63 HB_SCRIPT_KHMER, |
| 64 HB_SCRIPT_LAO, |
| 65 HB_SCRIPT_LATIN, |
| 66 HB_SCRIPT_MALAYALAM, |
| 67 HB_SCRIPT_MONGOLIAN, |
| 68 HB_SCRIPT_MYANMAR, |
| 69 HB_SCRIPT_OGHAM, |
| 70 HB_SCRIPT_OLD_ITALIC, |
| 71 HB_SCRIPT_ORIYA, |
| 72 HB_SCRIPT_RUNIC, |
| 73 HB_SCRIPT_SINHALA, |
| 74 HB_SCRIPT_SYRIAC, |
| 75 HB_SCRIPT_TAMIL, |
| 76 HB_SCRIPT_TELUGU, |
| 77 HB_SCRIPT_THAANA, |
| 78 HB_SCRIPT_THAI, |
| 79 HB_SCRIPT_TIBETAN, |
| 80 HB_SCRIPT_CANADIAN_ABORIGINAL, |
| 81 HB_SCRIPT_YI, |
| 82 HB_SCRIPT_TAGALOG, |
| 83 HB_SCRIPT_HANUNOO, |
| 84 HB_SCRIPT_BUHID, |
| 85 HB_SCRIPT_TAGBANWA, |
| 86 |
| 87 /* Unicode-4.0 additions */ |
| 88 HB_SCRIPT_BRAILLE, |
| 89 HB_SCRIPT_CYPRIOT, |
| 90 HB_SCRIPT_LIMBU, |
| 91 HB_SCRIPT_OSMANYA, |
| 92 HB_SCRIPT_SHAVIAN, |
| 93 HB_SCRIPT_LINEAR_B, |
| 94 HB_SCRIPT_TAI_LE, |
| 95 HB_SCRIPT_UGARITIC, |
| 96 |
| 97 /* Unicode-4.1 additions */ |
| 98 HB_SCRIPT_NEW_TAI_LUE, |
| 99 HB_SCRIPT_BUGINESE, |
| 100 HB_SCRIPT_GLAGOLITIC, |
| 101 HB_SCRIPT_TIFINAGH, |
| 102 HB_SCRIPT_SYLOTI_NAGRI, |
| 103 HB_SCRIPT_OLD_PERSIAN, |
| 104 HB_SCRIPT_KHAROSHTHI, |
| 105 |
| 106 /* Unicode-5.0 additions */ |
| 107 HB_SCRIPT_UNKNOWN, |
| 108 HB_SCRIPT_BALINESE, |
| 109 HB_SCRIPT_CUNEIFORM, |
| 110 HB_SCRIPT_PHOENICIAN, |
| 111 HB_SCRIPT_PHAGS_PA, |
| 112 HB_SCRIPT_NKO, |
| 113 |
| 114 /* Unicode-5.1 additions */ |
| 115 HB_SCRIPT_KAYAH_LI, |
| 116 HB_SCRIPT_LEPCHA, |
| 117 HB_SCRIPT_REJANG, |
| 118 HB_SCRIPT_SUNDANESE, |
| 119 HB_SCRIPT_SAURASHTRA, |
| 120 HB_SCRIPT_CHAM, |
| 121 HB_SCRIPT_OL_CHIKI, |
| 122 HB_SCRIPT_VAI, |
| 123 HB_SCRIPT_CARIAN, |
| 124 HB_SCRIPT_LYCIAN, |
| 125 HB_SCRIPT_LYDIAN, |
| 126 |
| 127 /* Unicode-5.2 additions */ |
| 128 HB_SCRIPT_AVESTAN, |
| 129 HB_SCRIPT_BAMUM, |
| 130 HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, |
| 131 HB_SCRIPT_IMPERIAL_ARAMAIC, |
| 132 HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, |
| 133 HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, |
| 134 HB_SCRIPT_JAVANESE, |
| 135 HB_SCRIPT_KAITHI, |
| 136 HB_SCRIPT_TAI_THAM, |
| 137 HB_SCRIPT_LISU, |
| 138 HB_SCRIPT_MEETEI_MAYEK, |
| 139 HB_SCRIPT_OLD_SOUTH_ARABIAN, |
| 140 HB_SCRIPT_OLD_TURKIC, |
| 141 HB_SCRIPT_SAMARITAN, |
| 142 HB_SCRIPT_TAI_VIET, |
| 143 |
| 144 /* Unicode-6.0 additions */ |
| 145 HB_SCRIPT_BATAK, |
| 146 HB_SCRIPT_BRAHMI, |
| 147 HB_SCRIPT_MANDAIC |
| 148 }; |
| 149 #endif |
| 150 |
| 151 hb_script_t |
| 152 hb_glib_script_to_script (GUnicodeScript script) |
| 153 { |
| 154 #if GLIB_CHECK_VERSION(2,29,14) |
| 155 return (hb_script_t) g_unicode_script_to_iso15924 (script); |
| 156 #else |
| 157 if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script))) |
| 158 return glib_script_to_script[script]; |
| 159 |
| 160 if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE)) |
| 161 return HB_SCRIPT_INVALID; |
| 162 |
| 163 return HB_SCRIPT_UNKNOWN; |
| 164 #endif |
| 165 } |
| 166 |
| 167 GUnicodeScript |
| 168 hb_glib_script_from_script (hb_script_t script) |
| 169 { |
| 170 #if GLIB_CHECK_VERSION(2,29,14) |
| 171 return g_unicode_script_from_iso15924 (script); |
| 172 #else |
| 173 unsigned int count = ARRAY_LENGTH (glib_script_to_script); |
| 174 for (unsigned int i = 0; i < count; i++) |
| 175 if (glib_script_to_script[i] == script) |
| 176 return (GUnicodeScript) i; |
| 177 |
| 178 if (unlikely (script == HB_SCRIPT_INVALID)) |
| 179 return G_UNICODE_SCRIPT_INVALID_CODE; |
| 180 |
| 181 return G_UNICODE_SCRIPT_UNKNOWN; |
| 182 #endif |
| 183 } |
| 184 |
| 185 |
| 186 static unsigned int |
| 187 hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 188 hb_codepoint_t unicode, |
| 189 void *user_data HB_UNUSED) |
| 190 |
| 191 { |
| 192 return g_unichar_combining_class (unicode); |
| 193 } |
| 194 |
| 195 static unsigned int |
| 196 hb_glib_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 197 hb_codepoint_t unicode, |
| 198 void *user_data HB_UNUSED) |
| 199 { |
| 200 return g_unichar_iswide (unicode) ? 2 : 1; |
| 201 } |
| 202 |
| 203 static hb_unicode_general_category_t |
| 204 hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 205 hb_codepoint_t unicode, |
| 206 void *user_data HB_UNUSED) |
| 207 |
| 208 { |
| 209 /* hb_unicode_general_category_t and GUnicodeType are identical */ |
| 210 return (hb_unicode_general_category_t) g_unichar_type (unicode); |
| 211 } |
| 212 |
| 213 static hb_codepoint_t |
| 214 hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 215 hb_codepoint_t unicode, |
| 216 void *user_data HB_UNUSED) |
| 217 { |
| 218 g_unichar_get_mirror_char (unicode, &unicode); |
| 219 return unicode; |
| 220 } |
| 221 |
| 222 static hb_script_t |
| 223 hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 224 hb_codepoint_t unicode, |
| 225 void *user_data HB_UNUSED) |
| 226 { |
| 227 return hb_glib_script_to_script (g_unichar_get_script (unicode)); |
| 228 } |
| 229 |
| 230 static hb_bool_t |
| 231 hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 232 hb_codepoint_t a, |
| 233 hb_codepoint_t b, |
| 234 hb_codepoint_t *ab, |
| 235 void *user_data HB_UNUSED) |
| 236 { |
| 237 #if GLIB_CHECK_VERSION(2,29,12) |
| 238 return g_unichar_compose (a, b, ab); |
| 239 #endif |
| 240 |
| 241 /* We don't ifdef-out the fallback code such that compiler always |
| 242 * sees it and makes sure it's compilable. */ |
| 243 |
| 244 if (!a || !b) |
| 245 return FALSE; |
| 246 |
| 247 gchar utf8[12]; |
| 248 gchar *normalized; |
| 249 gint len; |
| 250 hb_bool_t ret; |
| 251 |
| 252 len = g_unichar_to_utf8 (a, utf8); |
| 253 len += g_unichar_to_utf8 (b, utf8 + len); |
| 254 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC); |
| 255 len = g_utf8_strlen (normalized, -1); |
| 256 if (unlikely (!len)) |
| 257 return FALSE; |
| 258 |
| 259 if (len == 1) { |
| 260 *ab = g_utf8_get_char (normalized); |
| 261 ret = TRUE; |
| 262 } else { |
| 263 ret = FALSE; |
| 264 } |
| 265 |
| 266 g_free (normalized); |
| 267 return ret; |
| 268 } |
| 269 |
| 270 static hb_bool_t |
| 271 hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 272 hb_codepoint_t ab, |
| 273 hb_codepoint_t *a, |
| 274 hb_codepoint_t *b, |
| 275 void *user_data HB_UNUSED) |
| 276 { |
| 277 #if GLIB_CHECK_VERSION(2,29,12) |
| 278 return g_unichar_decompose (ab, a, b); |
| 279 #endif |
| 280 |
| 281 /* We don't ifdef-out the fallback code such that compiler always |
| 282 * sees it and makes sure it's compilable. */ |
| 283 |
| 284 gchar utf8[6]; |
| 285 gchar *normalized; |
| 286 gint len; |
| 287 hb_bool_t ret; |
| 288 |
| 289 len = g_unichar_to_utf8 (ab, utf8); |
| 290 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD); |
| 291 len = g_utf8_strlen (normalized, -1); |
| 292 if (unlikely (!len)) |
| 293 return FALSE; |
| 294 |
| 295 if (len == 1) { |
| 296 *a = g_utf8_get_char (normalized); |
| 297 *b = 0; |
| 298 ret = *a != ab; |
| 299 } else if (len == 2) { |
| 300 *a = g_utf8_get_char (normalized); |
| 301 *b = g_utf8_get_char (g_utf8_next_char (normalized)); |
| 302 /* Here's the ugly part: if ab decomposes to a single character and |
| 303 * that character decomposes again, we have to detect that and undo |
| 304 * the second part :-(. */ |
| 305 gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); |
| 306 hb_codepoint_t c = g_utf8_get_char (recomposed); |
| 307 if (c != ab && c != *a) { |
| 308 *a = c; |
| 309 *b = 0; |
| 310 } |
| 311 g_free (recomposed); |
| 312 ret = TRUE; |
| 313 } else { |
| 314 /* If decomposed to more than two characters, take the last one, |
| 315 * and recompose the rest to get the first component. */ |
| 316 gchar *end = g_utf8_offset_to_pointer (normalized, len - 1); |
| 317 gchar *recomposed; |
| 318 *b = g_utf8_get_char (end); |
| 319 recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC
); |
| 320 /* We expect that recomposed has exactly one character now. */ |
| 321 *a = g_utf8_get_char (recomposed); |
| 322 g_free (recomposed); |
| 323 ret = TRUE; |
| 324 } |
| 325 |
| 326 g_free (normalized); |
| 327 return ret; |
| 328 } |
| 329 |
| 330 |
| 331 extern HB_INTERNAL hb_unicode_funcs_t _hb_unicode_funcs_glib; |
| 332 hb_unicode_funcs_t _hb_glib_unicode_funcs = { |
| 333 HB_OBJECT_HEADER_STATIC, |
| 334 |
| 335 NULL, /* parent */ |
| 336 TRUE, /* immutable */ |
| 337 { |
| 338 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name, |
| 339 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS |
| 340 #undef HB_UNICODE_FUNC_IMPLEMENT |
| 341 } |
| 342 }; |
| 343 |
| 344 hb_unicode_funcs_t * |
| 345 hb_glib_get_unicode_funcs (void) |
| 346 { |
| 347 return &_hb_glib_unicode_funcs; |
| 348 } |
| 349 |
OLD | NEW |