| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2009 Red Hat, Inc. | 2 * Copyright © 2009 Red Hat, Inc. |
| 3 * Copyright © 2011 Google, Inc. |
| 3 * | 4 * |
| 4 * This is part of HarfBuzz, a text shaping library. | 5 * This is part of HarfBuzz, a text shaping library. |
| 5 * | 6 * |
| 6 * Permission is hereby granted, without written agreement and without | 7 * Permission is hereby granted, without written agreement and without |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | 8 * license or royalty fees, to use, copy, modify, and distribute this |
| 8 * software and its documentation for any purpose, provided that the | 9 * software and its documentation for any purpose, provided that the |
| 9 * above copyright notice and the following two paragraphs appear in | 10 * above copyright notice and the following two paragraphs appear in |
| 10 * all copies of this software. | 11 * all copies of this software. |
| 11 * | 12 * |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 * DAMAGE. | 17 * DAMAGE. |
| 17 * | 18 * |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 * | 24 * |
| 24 * Red Hat Author(s): Behdad Esfahbod | 25 * Red Hat Author(s): Behdad Esfahbod |
| 26 * Google Author(s): Behdad Esfahbod |
| 25 */ | 27 */ |
| 26 | 28 |
| 27 #include "hb-private.h" | 29 #include "hb-private.hh" |
| 28 #include "hb-ot.h" | 30 #include "hb-ot.h" |
| 29 | 31 |
| 30 #include <string.h> | 32 #include <string.h> |
| 31 | 33 |
| 32 HB_BEGIN_DECLS | |
| 33 | 34 |
| 34 | 35 |
| 36 /* hb_script_t */ |
| 37 |
| 38 static hb_tag_t |
| 39 hb_ot_old_tag_from_script (hb_script_t script) |
| 40 { |
| 41 switch ((hb_tag_t) script) { |
| 42 case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT; |
| 43 |
| 44 /* KATAKANA and HIRAGANA both map to 'kana' */ |
| 45 case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); |
| 46 |
| 47 /* Spaces at the end are preserved, unlike ISO 15924 */ |
| 48 case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' '); |
| 49 case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' '); |
| 50 /* Unicode-5.0 additions */ |
| 51 case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' '); |
| 52 /* Unicode-5.1 additions */ |
| 53 case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' '); |
| 54 /* Unicode-5.2 additions */ |
| 55 /* Unicode-6.0 additions */ |
| 56 } |
| 57 |
| 58 /* Else, just change first char to lowercase and return */ |
| 59 return ((hb_tag_t) script) | 0x20000000; |
| 60 } |
| 61 |
| 62 static hb_script_t |
| 63 hb_ot_old_tag_to_script (hb_tag_t tag) |
| 64 { |
| 65 if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT)) |
| 66 return HB_SCRIPT_INVALID; |
| 67 |
| 68 /* This side of the conversion is fully algorithmic. */ |
| 69 |
| 70 /* Any spaces at the end of the tag are replaced by repeating the last |
| 71 * letter. Eg 'nko ' -> 'Nkoo' */ |
| 72 if (unlikely ((tag & 0x0000FF00) == 0x00002000)) |
| 73 tag |= (tag >> 8) & 0x0000FF00; /* Copy second letter to third */ |
| 74 if (unlikely ((tag & 0x000000FF) == 0x00000020)) |
| 75 tag |= (tag >> 8) & 0x000000FF; /* Copy third letter to fourth */ |
| 76 |
| 77 /* Change first char to uppercase and return */ |
| 78 return (hb_script_t) (tag & ~0x20000000); |
| 79 } |
| 80 |
| 81 static hb_tag_t |
| 82 hb_ot_new_tag_from_script (hb_script_t script) |
| 83 { |
| 84 switch ((hb_tag_t) script) { |
| 85 case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2'); |
| 86 case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2'); |
| 87 case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2'); |
| 88 case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2'); |
| 89 case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2'); |
| 90 case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2'); |
| 91 case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2'); |
| 92 case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2'); |
| 93 case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2'); |
| 94 } |
| 95 |
| 96 return HB_OT_TAG_DEFAULT_SCRIPT; |
| 97 } |
| 98 |
| 99 static hb_script_t |
| 100 hb_ot_new_tag_to_script (hb_tag_t tag) |
| 101 { |
| 102 switch (tag) { |
| 103 case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI; |
| 104 case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI; |
| 105 case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI; |
| 106 case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI; |
| 107 case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA; |
| 108 case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM; |
| 109 case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA; |
| 110 case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL; |
| 111 case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU; |
| 112 } |
| 113 |
| 114 return HB_SCRIPT_UNKNOWN; |
| 115 } |
| 116 |
| 35 /* | 117 /* |
| 36 * Complete list at: | 118 * Complete list at: |
| 37 * http://www.microsoft.com/typography/otspec/scripttags.htm | 119 * https://www.microsoft.com/typography/otspec/scripttags.htm |
| 120 * https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm |
| 121 * |
| 122 * Most of the script tags are the same as the ISO 15924 tag but lowercased. |
| 123 * So we just do that, and handle the exceptional cases in a switch. |
| 38 */ | 124 */ |
| 39 static const hb_tag_t ot_scripts[][3] = { | |
| 40 {HB_TAG('D','F','L','T')}, /* HB_SCRIPT_COMMON */ | |
| 41 {HB_TAG('D','F','L','T')}, /* HB_SCRIPT_INHERITED */ | |
| 42 {HB_TAG('a','r','a','b')}, /* HB_SCRIPT_ARABIC */ | |
| 43 {HB_TAG('a','r','m','n')}, /* HB_SCRIPT_ARMENIAN */ | |
| 44 {HB_TAG('b','n','g','2'), HB_TAG('b','e','n','g')}, /* HB_SCRIPT_BENGALI */ | |
| 45 {HB_TAG('b','o','p','o')}, /* HB_SCRIPT_BOPOMOFO */ | |
| 46 {HB_TAG('c','h','e','r')}, /* HB_SCRIPT_CHEROKEE */ | |
| 47 {HB_TAG('c','o','p','t')}, /* HB_SCRIPT_COPTIC */ | |
| 48 {HB_TAG('c','y','r','l')}, /* HB_SCRIPT_CYRILLIC */ | |
| 49 {HB_TAG('d','s','r','t')}, /* HB_SCRIPT_DESERET */ | |
| 50 {HB_TAG('d','e','v','2'), HB_TAG('d','e','v','a')}, /* HB_SCRIPT_DEVANAGARI
*/ | |
| 51 {HB_TAG('e','t','h','i')}, /* HB_SCRIPT_ETHIOPIC */ | |
| 52 {HB_TAG('g','e','o','r')}, /* HB_SCRIPT_GEORGIAN */ | |
| 53 {HB_TAG('g','o','t','h')}, /* HB_SCRIPT_GOTHIC */ | |
| 54 {HB_TAG('g','r','e','k')}, /* HB_SCRIPT_GREEK */ | |
| 55 {HB_TAG('g','j','r','2'), HB_TAG('g','u','j','r')}, /* HB_SCRIPT_GUJARATI */ | |
| 56 {HB_TAG('g','u','r','2'), HB_TAG('g','u','r','u')}, /* HB_SCRIPT_GURMUKHI */ | |
| 57 {HB_TAG('h','a','n','i')}, /* HB_SCRIPT_HAN */ | |
| 58 {HB_TAG('h','a','n','g')}, /* HB_SCRIPT_HANGUL */ | |
| 59 {HB_TAG('h','e','b','r')}, /* HB_SCRIPT_HEBREW */ | |
| 60 {HB_TAG('k','a','n','a')}, /* HB_SCRIPT_HIRAGANA */ | |
| 61 {HB_TAG('k','n','d','2'), HB_TAG('k','n','d','a')}, /* HB_SCRIPT_KANNADA */ | |
| 62 {HB_TAG('k','a','n','a')}, /* HB_SCRIPT_KATAKANA */ | |
| 63 {HB_TAG('k','h','m','r')}, /* HB_SCRIPT_KHMER */ | |
| 64 {HB_TAG('l','a','o',' ')}, /* HB_SCRIPT_LAO */ | |
| 65 {HB_TAG('l','a','t','n')}, /* HB_SCRIPT_LATIN */ | |
| 66 {HB_TAG('m','l','m','2'), HB_TAG('m','l','y','m')}, /* HB_SCRIPT_MALAYALAM *
/ | |
| 67 {HB_TAG('m','o','n','g')}, /* HB_SCRIPT_MONGOLIAN */ | |
| 68 {HB_TAG('m','y','m','r')}, /* HB_SCRIPT_MYANMAR */ | |
| 69 {HB_TAG('o','g','a','m')}, /* HB_SCRIPT_OGHAM */ | |
| 70 {HB_TAG('i','t','a','l')}, /* HB_SCRIPT_OLD_ITALIC */ | |
| 71 {HB_TAG('o','r','y','2'), HB_TAG('o','r','y','a')}, /* HB_SCRIPT_ORIYA */ | |
| 72 {HB_TAG('r','u','n','r')}, /* HB_SCRIPT_RUNIC */ | |
| 73 {HB_TAG('s','i','n','h')}, /* HB_SCRIPT_SINHALA */ | |
| 74 {HB_TAG('s','y','r','c')}, /* HB_SCRIPT_SYRIAC */ | |
| 75 {HB_TAG('t','m','l','2'), HB_TAG('t','a','m','l')}, /* HB_SCRIPT_TAMIL */ | |
| 76 {HB_TAG('t','e','l','2'), HB_TAG('t','e','l','u')}, /* HB_SCRIPT_TELUGU */ | |
| 77 {HB_TAG('t','h','a','a')}, /* HB_SCRIPT_THAANA */ | |
| 78 {HB_TAG('t','h','a','i')}, /* HB_SCRIPT_THAI */ | |
| 79 {HB_TAG('t','i','b','t')}, /* HB_SCRIPT_TIBETAN */ | |
| 80 {HB_TAG('c','a','n','s')}, /* HB_SCRIPT_CANADIAN_ABORIGINAL */ | |
| 81 {HB_TAG('y','i',' ',' ')}, /* HB_SCRIPT_YI */ | |
| 82 {HB_TAG('t','g','l','g')}, /* HB_SCRIPT_TAGALOG */ | |
| 83 {HB_TAG('h','a','n','o')}, /* HB_SCRIPT_HANUNOO */ | |
| 84 {HB_TAG('b','u','h','d')}, /* HB_SCRIPT_BUHID */ | |
| 85 {HB_TAG('t','a','g','b')}, /* HB_SCRIPT_TAGBANWA */ | |
| 86 | 125 |
| 87 /* Unicode-4.0 additions */ | 126 void |
| 88 {HB_TAG('b','r','a','i')},» /* HB_SCRIPT_BRAILLE */ | 127 hb_ot_tags_from_script (hb_script_t script, |
| 89 {HB_TAG('c','p','r','t')},» /* HB_SCRIPT_CYPRIOT */ | 128 » » » hb_tag_t *script_tag_1, |
| 90 {HB_TAG('l','i','m','b')},» /* HB_SCRIPT_LIMBU */ | 129 » » » hb_tag_t *script_tag_2) |
| 91 {HB_TAG('o','s','m','a')},» /* HB_SCRIPT_OSMANYA */ | 130 { |
| 92 {HB_TAG('s','h','a','w')},» /* HB_SCRIPT_SHAVIAN */ | 131 hb_tag_t new_tag; |
| 93 {HB_TAG('l','i','n','b')},» /* HB_SCRIPT_LINEAR_B */ | |
| 94 {HB_TAG('t','a','l','e')},» /* HB_SCRIPT_TAI_LE */ | |
| 95 {HB_TAG('u','g','a','r')},» /* HB_SCRIPT_UGARITIC */ | |
| 96 | 132 |
| 97 /* Unicode-4.1 additions */ | 133 *script_tag_2 = HB_OT_TAG_DEFAULT_SCRIPT; |
| 98 {HB_TAG('t','a','l','u')},» /* HB_SCRIPT_NEW_TAI_LUE */ | 134 *script_tag_1 = hb_ot_old_tag_from_script (script); |
| 99 {HB_TAG('b','u','g','i')},» /* HB_SCRIPT_BUGINESE */ | |
| 100 {HB_TAG('g','l','a','g')},» /* HB_SCRIPT_GLAGOLITIC */ | |
| 101 {HB_TAG('t','f','n','g')},» /* HB_SCRIPT_TIFINAGH */ | |
| 102 {HB_TAG('s','y','l','o')},» /* HB_SCRIPT_SYLOTI_NAGRI */ | |
| 103 {HB_TAG('x','p','e','o')},» /* HB_SCRIPT_OLD_PERSIAN */ | |
| 104 {HB_TAG('k','h','a','r')},» /* HB_SCRIPT_KHAROSHTHI */ | |
| 105 | 135 |
| 106 /* Unicode-5.0 additions */ | 136 new_tag = hb_ot_new_tag_from_script (script); |
| 107 {HB_TAG('D','F','L','T')},» /* HB_SCRIPT_UNKNOWN */ | 137 if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT)) { |
| 108 {HB_TAG('b','a','l','i')},» /* HB_SCRIPT_BALINESE */ | 138 *script_tag_2 = *script_tag_1; |
| 109 {HB_TAG('x','s','u','x')},» /* HB_SCRIPT_CUNEIFORM */ | 139 *script_tag_1 = new_tag; |
| 110 {HB_TAG('p','h','n','x')},» /* HB_SCRIPT_PHOENICIAN */ | 140 } |
| 111 {HB_TAG('p','h','a','g')},» /* HB_SCRIPT_PHAGS_PA */ | |
| 112 {HB_TAG('n','k','o',' ')},» /* HB_SCRIPT_NKO */ | |
| 113 | |
| 114 /* Unicode-5.1 additions */ | |
| 115 {HB_TAG('k','a','l','i')},» /* HB_SCRIPT_KAYAH_LI */ | |
| 116 {HB_TAG('l','e','p','c')},» /* HB_SCRIPT_LEPCHA */ | |
| 117 {HB_TAG('r','j','n','g')},» /* HB_SCRIPT_REJANG */ | |
| 118 {HB_TAG('s','u','n','d')},» /* HB_SCRIPT_SUNDANESE */ | |
| 119 {HB_TAG('s','a','u','r')},» /* HB_SCRIPT_SAURASHTRA */ | |
| 120 {HB_TAG('c','h','a','m')},» /* HB_SCRIPT_CHAM */ | |
| 121 {HB_TAG('o','l','c','k')},» /* HB_SCRIPT_OL_CHIKI */ | |
| 122 {HB_TAG('v','a','i',' ')},» /* HB_SCRIPT_VAI */ | |
| 123 {HB_TAG('c','a','r','i')},» /* HB_SCRIPT_CARIAN */ | |
| 124 {HB_TAG('l','y','c','i')},» /* HB_SCRIPT_LYCIAN */ | |
| 125 {HB_TAG('l','y','d','i')},» /* HB_SCRIPT_LYDIAN */ | |
| 126 | |
| 127 /* Unicode-5.2 additions */ | |
| 128 {HB_TAG('a','v','s','t')},» /* HB_SCRIPT_AVESTAN */ | |
| 129 {HB_TAG('b','a','m','u')},» /* HB_SCRIPT_BAMUM */ | |
| 130 {HB_TAG('e','g','y','p')},» /* HB_SCRIPT_EGYPTIAN_HIEROGLYPHS */ | |
| 131 {HB_TAG('a','r','m','i')},» /* HB_SCRIPT_IMPERIAL_ARAMAIC */ | |
| 132 {HB_TAG('p','h','l','i')},» /* HB_SCRIPT_INSCRIPTIONAL_PAHLAVI */ | |
| 133 {HB_TAG('p','r','t','i')},» /* HB_SCRIPT_INSCRIPTIONAL_PARTHIAN */ | |
| 134 {HB_TAG('j','a','v','a')},» /* HB_SCRIPT_JAVANESE */ | |
| 135 {HB_TAG('k','t','h','i')},» /* HB_SCRIPT_KAITHI */ | |
| 136 {HB_TAG('l','i','s','u')},» /* HB_SCRIPT_LISU */ | |
| 137 {HB_TAG('m','y','e','i')},» /* HB_SCRIPT_MEETEI_MAYEK */ | |
| 138 {HB_TAG('s','a','r','b')},» /* HB_SCRIPT_OLD_SOUTH_ARABIAN */ | |
| 139 {HB_TAG('o','r','k','h')},» /* HB_SCRIPT_OLD_TURKIC */ | |
| 140 {HB_TAG('s','a','m','r')},» /* HB_SCRIPT_SAMARITAN */ | |
| 141 {HB_TAG('l','a','n','a')},» /* HB_SCRIPT_TAI_THAM */ | |
| 142 {HB_TAG('t','a','v','t')},» /* HB_SCRIPT_TAI_VIET */ | |
| 143 | |
| 144 /* Unicode-6.0 additions */ | |
| 145 {HB_TAG('b','a','t','k')},» /* HB_SCRIPT_BATAK */ | |
| 146 {HB_TAG('b','r','a','h')},» /* HB_SCRIPT_BRAHMI */ | |
| 147 {HB_TAG('m','a','n','d')} » /* HB_SCRIPT_MANDAIC */ | |
| 148 }; | |
| 149 | |
| 150 const hb_tag_t * | |
| 151 hb_ot_tags_from_script (hb_script_t script) | |
| 152 { | |
| 153 static const hb_tag_t def_tag[] = {HB_OT_TAG_DEFAULT_SCRIPT, HB_TAG_NONE}; | |
| 154 | |
| 155 if (unlikely ((unsigned int) script >= ARRAY_LENGTH (ot_scripts))) | |
| 156 return def_tag; | |
| 157 | |
| 158 return ot_scripts[script]; | |
| 159 } | 141 } |
| 160 | 142 |
| 161 hb_script_t | 143 hb_script_t |
| 162 hb_ot_tag_to_script (hb_tag_t tag) | 144 hb_ot_tag_to_script (hb_tag_t tag) |
| 163 { | 145 { |
| 164 int i; | 146 if (unlikely ((tag & 0x000000FF) == '2')) |
| 147 return hb_ot_new_tag_to_script (tag); |
| 165 | 148 |
| 166 for (i = 0; i < ARRAY_LENGTH (ot_scripts); i++) { | 149 return hb_ot_old_tag_to_script (tag); |
| 167 const hb_tag_t *p; | 150 } |
| 168 for (p = ot_scripts[i]; *p; p++) | |
| 169 if (tag == *p) | |
| 170 return i; | |
| 171 } | |
| 172 | 151 |
| 173 return HB_SCRIPT_UNKNOWN; | 152 |
| 174 } | 153 /* hb_language_t */ |
| 175 | 154 |
| 176 typedef struct { | 155 typedef struct { |
| 177 char language[6]; | 156 char language[6]; |
| 178 hb_tag_t tag; | 157 hb_tag_t tag; |
| 179 } LangTag; | 158 } LangTag; |
| 180 | 159 |
| 181 /* | 160 /* |
| 182 * Complete list at: | 161 * Complete list at: |
| 183 * http://www.microsoft.com/typography/otspec/languagetags.htm | 162 * http://www.microsoft.com/typography/otspec/languagetags.htm |
| 184 * | 163 * |
| 185 * Generated by intersecting the OpenType language tag list from | 164 * Generated by intersecting the OpenType language tag list from |
| 186 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from | 165 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from |
| 187 * 2008/08/04, matching on name, and finally adjusted manually. | 166 * 2008/08/04, matching on name, and finally adjusted manually. |
| 188 * | 167 * |
| 189 * Many items still missing. Those are commented out at the end. | 168 * Many items still missing. Those are commented out at the end. |
| 190 * Keep sorted for bsearch. | 169 * Keep sorted for bsearch. |
| 191 */ | 170 */ |
| 171 |
| 192 static const LangTag ot_languages[] = { | 172 static const LangTag ot_languages[] = { |
| 193 {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ | 173 {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ |
| 194 {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ | 174 {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ |
| 195 {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ | 175 {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ |
| 196 {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */ | 176 {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */ |
| 197 {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ | 177 {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ |
| 198 {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ | 178 {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ |
| 199 {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ | 179 {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ |
| 200 {"ar", HB_TAG('A','R','A',' ')}, /* Arabic */ | 180 {"ar", HB_TAG('A','R','A',' ')}, /* Arabic */ |
| 201 {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ | 181 {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ |
| (...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 468 {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ | 448 {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ |
| 469 {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ | 449 {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ |
| 470 {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ | 450 {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ |
| 471 {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ | 451 {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ |
| 472 {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ | 452 {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ |
| 473 {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */ | 453 {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */ |
| 474 {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ | 454 {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ |
| 475 {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ | 455 {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ |
| 476 {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ | 456 {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ |
| 477 {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ | 457 {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ |
| 478 {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ | |
| 479 {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ | |
| 480 {"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */ | |
| 481 {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ | |
| 482 {"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */ | |
| 483 {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ | 458 {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ |
| 484 {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ | 459 {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ |
| 485 | 460 |
| 486 /* I couldn't find the language id for these */ | 461 /* I couldn't find the language id for these */ |
| 487 | 462 |
| 488 /*{"??", HB_TAG('A','G','W',' ')},*/ /* Agaw */ | 463 /*{"??", HB_TAG('A','G','W',' ')},*/ /* Agaw */ |
| 489 /*{"??", HB_TAG('A','L','S',' ')},*/ /* Alsatian */ | 464 /*{"??", HB_TAG('A','L','S',' ')},*/ /* Alsatian */ |
| 490 /*{"??", HB_TAG('A','L','T',' ')},*/ /* Altai */ | 465 /*{"??", HB_TAG('A','L','T',' ')},*/ /* Altai */ |
| 491 /*{"??", HB_TAG('A','R','K',' ')},*/ /* Arakanese */ | 466 /*{"??", HB_TAG('A','R','K',' ')},*/ /* Arakanese */ |
| 492 /*{"??", HB_TAG('A','T','H',' ')},*/ /* Athapaskan */ | 467 /*{"??", HB_TAG('A','T','H',' ')},*/ /* Athapaskan */ |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 588 /*{"??", HB_TAG('T','U','A',' ')},*/ /* Turoyo Aramaic */ | 563 /*{"??", HB_TAG('T','U','A',' ')},*/ /* Turoyo Aramaic */ |
| 589 /*{"??", HB_TAG('T','U','V',' ')},*/ /* Tuvin */ | 564 /*{"??", HB_TAG('T','U','V',' ')},*/ /* Tuvin */ |
| 590 /*{"??", HB_TAG('W','C','R',' ')},*/ /* West-Cree */ | 565 /*{"??", HB_TAG('W','C','R',' ')},*/ /* West-Cree */ |
| 591 /*{"??", HB_TAG('X','B','D',' ')},*/ /* Tai Lue */ | 566 /*{"??", HB_TAG('X','B','D',' ')},*/ /* Tai Lue */ |
| 592 /*{"??", HB_TAG('Y','C','R',' ')},*/ /* Y-Cree */ | 567 /*{"??", HB_TAG('Y','C','R',' ')},*/ /* Y-Cree */ |
| 593 /*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */ | 568 /*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */ |
| 594 /*{"??", HB_TAG('Y','I','M',' ')},*/ /* Yi Modern */ | 569 /*{"??", HB_TAG('Y','I','M',' ')},*/ /* Yi Modern */ |
| 595 /*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ | 570 /*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ |
| 596 }; | 571 }; |
| 597 | 572 |
| 573 static const LangTag ot_languages_zh[] = { |
| 574 {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ |
| 575 {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ |
| 576 {"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */ |
| 577 {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ |
| 578 {"zh-tw", HB_TAG('Z','H','T',' ')} /* Chinese (Taiwan) */ |
| 579 }; |
| 580 |
| 598 static int | 581 static int |
| 599 lang_compare_first_component (const char *a, | 582 lang_compare_first_component (const char *a, |
| 600 const char *b) | 583 const char *b) |
| 601 { | 584 { |
| 602 unsigned int da, db; | 585 unsigned int da, db; |
| 603 const char *p; | 586 const char *p; |
| 604 | 587 |
| 605 p = strstr (a, "-"); | 588 p = strchr (a, '-'); |
| 606 da = p ? (unsigned int) (p - a) : strlen (a); | 589 da = p ? (unsigned int) (p - a) : strlen (a); |
| 607 | 590 |
| 608 p = strstr (b, "-"); | 591 p = strchr (b, '-'); |
| 609 db = p ? (unsigned int) (p - b) : strlen (b); | 592 db = p ? (unsigned int) (p - b) : strlen (b); |
| 610 | 593 |
| 611 return strncmp (a, b, MAX (da, db)); | 594 return strncmp (a, b, MAX (da, db)); |
| 612 } | 595 } |
| 613 | 596 |
| 614 static hb_bool_t | 597 static hb_bool_t |
| 615 lang_matches (const char *lang_str, const char *spec) | 598 lang_matches (const char *lang_str, const char *spec) |
| 616 { | 599 { |
| 617 unsigned int len = strlen (spec); | 600 unsigned int len = strlen (spec); |
| 618 | 601 |
| 619 return lang_str && strncmp (lang_str, spec, len) == 0 && | 602 return strncmp (lang_str, spec, len) == 0 && |
| 620 (lang_str[len] == '\0' || lang_str[len] == '-'); | 603 (lang_str[len] == '\0' || lang_str[len] == '-'); |
| 621 } | 604 } |
| 622 | 605 |
| 623 hb_tag_t | 606 hb_tag_t |
| 624 hb_ot_tag_from_language (hb_language_t language) | 607 hb_ot_tag_from_language (hb_language_t language) |
| 625 { | 608 { |
| 626 const char *lang_str; | 609 const char *lang_str, *s; |
| 627 LangTag *lang_tag; | 610 const LangTag *lang_tag; |
| 628 | 611 |
| 629 if (language == NULL) | 612 if (language == HB_LANGUAGE_INVALID) |
| 630 return HB_OT_TAG_DEFAULT_LANGUAGE; | 613 return HB_OT_TAG_DEFAULT_LANGUAGE; |
| 631 | 614 |
| 632 lang_str = hb_language_to_string (language); | 615 lang_str = hb_language_to_string (language); |
| 633 | 616 |
| 634 if (0 == strcmp (lang_str, "x-hbot")) { | 617 s = strstr (lang_str, "x-hbot"); |
| 618 if (s) { |
| 635 char tag[4]; | 619 char tag[4]; |
| 636 int i; | 620 int i; |
| 637 lang_str += 6; | 621 s += 6; |
| 638 #define IS_LETTER(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) | 622 for (i = 0; i < 4 && ISALPHA (s[i]); i++) |
| 639 #define TO_UPPER(c) (((c) >= 'a' && (c) <= 'z') ? (c) + 'A' - 'a' : (c)) | 623 tag[i] = TOUPPER (s[i]); |
| 640 for (i = 0; i < 4 && IS_LETTER (lang_str[i]); i++) | 624 if (i) { |
| 641 tag[i] = TO_UPPER (lang_str[i]); | 625 for (; i < 4; i++) |
| 642 for (; i < 4; i++) | 626 » tag[i] = ' '; |
| 643 tag[i] = ' '; | 627 return HB_TAG_CHAR4 (tag); |
| 644 return HB_TAG_STR (tag); | 628 } |
| 645 } | 629 } |
| 646 | 630 |
| 647 /* find a language matching in the first component */ | 631 /* Find a language matching in the first component */ |
| 648 lang_tag = bsearch (lang_str, ot_languages, | 632 lang_tag = (LangTag *) bsearch (lang_str, ot_languages, |
| 649 » » ARRAY_LENGTH (ot_languages), sizeof (LangTag), | 633 » » » » ARRAY_LENGTH (ot_languages), sizeof (LangTag), |
| 650 » » (hb_compare_func_t) lang_compare_first_component); | 634 » » » » (hb_compare_func_t) lang_compare_first_compone
nt); |
| 651 | |
| 652 /* we now need to find the best language matching */ | |
| 653 if (lang_tag) | |
| 654 { | |
| 655 hb_bool_t found = FALSE; | |
| 656 | |
| 657 /* go to the final one matching in the first component */ | |
| 658 while (lang_tag + 1 < ot_languages + ARRAY_LENGTH (ot_languages) && | |
| 659 » lang_compare_first_component (lang_str, (lang_tag + 1)->language) ==
0) | |
| 660 lang_tag++; | |
| 661 | |
| 662 /* go back, find which one matches completely */ | |
| 663 while (lang_tag >= ot_languages && | |
| 664 » lang_compare_first_component (lang_str, lang_tag->language) == 0) | |
| 665 { | |
| 666 if (lang_matches (lang_str, lang_tag->language)) { | |
| 667 » found = TRUE; | |
| 668 » break; | |
| 669 } | |
| 670 | |
| 671 lang_tag--; | |
| 672 } | |
| 673 | |
| 674 if (!found) | |
| 675 lang_tag = NULL; | |
| 676 } | |
| 677 | |
| 678 if (lang_tag) | 635 if (lang_tag) |
| 679 return lang_tag->tag; | 636 return lang_tag->tag; |
| 680 | 637 |
| 638 /* Otherwise, check the Chinese ones */ |
| 639 if (0 == lang_compare_first_component (lang_str, "zh")) |
| 640 { |
| 641 unsigned int i; |
| 642 |
| 643 for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++) |
| 644 { |
| 645 lang_tag = &ot_languages_zh[i]; |
| 646 if (lang_matches (lang_tag->language, lang_str)) |
| 647 return lang_tag->tag; |
| 648 } |
| 649 |
| 650 /* Otherwise just return 'ZHS ' */ |
| 651 return HB_TAG('Z','H','S',' '); |
| 652 } |
| 653 |
| 654 s = strchr (lang_str, '-'); |
| 655 if (!s) |
| 656 s = lang_str + strlen (lang_str); |
| 657 if (s - lang_str == 3) { |
| 658 /* Assume it's ISO-639-3 and upper-case and use it. */ |
| 659 return hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000; |
| 660 } |
| 661 |
| 681 return HB_OT_TAG_DEFAULT_LANGUAGE; | 662 return HB_OT_TAG_DEFAULT_LANGUAGE; |
| 682 } | 663 } |
| 683 | 664 |
| 684 hb_language_t | 665 hb_language_t |
| 685 hb_ot_tag_to_language (hb_tag_t tag) | 666 hb_ot_tag_to_language (hb_tag_t tag) |
| 686 { | 667 { |
| 687 unsigned int i; | 668 unsigned int i; |
| 688 unsigned char buf[11] = "x-hbot"; | 669 |
| 670 if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) |
| 671 return NULL; |
| 689 | 672 |
| 690 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) | 673 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) |
| 691 if (ot_languages[i].tag == tag) | 674 if (ot_languages[i].tag == tag) |
| 692 return hb_language_from_string (ot_languages[i].language); | 675 return hb_language_from_string (ot_languages[i].language, -1); |
| 693 | 676 |
| 694 buf[6] = tag >> 24; | 677 /* If tag starts with ZH, it's Chinese */ |
| 695 buf[7] = (tag >> 16) & 0xFF; | 678 if ((tag & 0xFFFF0000) == 0x5A480000) { |
| 696 buf[8] = (tag >> 8) & 0xFF; | 679 switch (tag) { |
| 697 buf[9] = tag & 0xFF; | 680 case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk", -1)
; /* Hong Kong */ |
| 698 buf[10] = '\0'; | 681 default: { |
| 699 return hb_language_from_string ((char *) buf); | 682 /* Encode the tag... */ |
| 683 » unsigned char buf[14] = "zh-x-hbot"; |
| 684 » buf[9] = tag >> 24; |
| 685 » buf[10] = (tag >> 16) & 0xFF; |
| 686 » buf[11] = (tag >> 8) & 0xFF; |
| 687 » buf[12] = tag & 0xFF; |
| 688 » if (buf[12] == 0x20) |
| 689 » buf[12] = '\0'; |
| 690 » buf[13] = '\0'; |
| 691 » return hb_language_from_string ((char *) buf, -1); |
| 692 } |
| 693 } |
| 694 } |
| 695 |
| 696 /* Else return a custom language in the form of "x-hbotABCD" */ |
| 697 { |
| 698 unsigned char buf[11] = "x-hbot"; |
| 699 buf[6] = tag >> 24; |
| 700 buf[7] = (tag >> 16) & 0xFF; |
| 701 buf[8] = (tag >> 8) & 0xFF; |
| 702 buf[9] = tag & 0xFF; |
| 703 if (buf[9] == 0x20) |
| 704 buf[9] = '\0'; |
| 705 buf[10] = '\0'; |
| 706 return hb_language_from_string ((char *) buf, -1); |
| 707 } |
| 700 } | 708 } |
| 701 | 709 |
| 702 | 710 |
| 703 HB_END_DECLS | |
| OLD | NEW |