OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2009 Red Hat, Inc. | 2 * Copyright © 2009 Red Hat, Inc. |
| 3 * Copyright © 2011 Google, Inc. |
3 * | 4 * |
4 * This is part of HarfBuzz, a text shaping library. | 5 * This is part of HarfBuzz, a text shaping library. |
5 * | 6 * |
6 * Permission is hereby granted, without written agreement and without | 7 * Permission is hereby granted, without written agreement and without |
7 * license or royalty fees, to use, copy, modify, and distribute this | 8 * license or royalty fees, to use, copy, modify, and distribute this |
8 * software and its documentation for any purpose, provided that the | 9 * software and its documentation for any purpose, provided that the |
9 * above copyright notice and the following two paragraphs appear in | 10 * above copyright notice and the following two paragraphs appear in |
10 * all copies of this software. | 11 * all copies of this software. |
11 * | 12 * |
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 * DAMAGE. | 17 * DAMAGE. |
17 * | 18 * |
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 * | 24 * |
24 * Red Hat Author(s): Behdad Esfahbod | 25 * Red Hat Author(s): Behdad Esfahbod |
| 26 * Google Author(s): Behdad Esfahbod |
25 */ | 27 */ |
26 | 28 |
27 #include "hb-private.h" | 29 #include "hb-private.hh" |
28 #include "hb-ot.h" | 30 #include "hb-ot.h" |
29 | 31 |
30 #include <string.h> | 32 #include <string.h> |
31 | 33 |
32 HB_BEGIN_DECLS | |
33 | 34 |
34 | 35 |
| 36 /* hb_script_t */ |
| 37 |
| 38 static hb_tag_t |
| 39 hb_ot_old_tag_from_script (hb_script_t script) |
| 40 { |
| 41 switch ((hb_tag_t) script) { |
| 42 case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT; |
| 43 |
| 44 /* KATAKANA and HIRAGANA both map to 'kana' */ |
| 45 case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); |
| 46 |
| 47 /* Spaces at the end are preserved, unlike ISO 15924 */ |
| 48 case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' '); |
| 49 case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' '); |
| 50 /* Unicode-5.0 additions */ |
| 51 case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' '); |
| 52 /* Unicode-5.1 additions */ |
| 53 case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' '); |
| 54 /* Unicode-5.2 additions */ |
| 55 /* Unicode-6.0 additions */ |
| 56 } |
| 57 |
| 58 /* Else, just change first char to lowercase and return */ |
| 59 return ((hb_tag_t) script) | 0x20000000; |
| 60 } |
| 61 |
| 62 static hb_script_t |
| 63 hb_ot_old_tag_to_script (hb_tag_t tag) |
| 64 { |
| 65 if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT)) |
| 66 return HB_SCRIPT_INVALID; |
| 67 |
| 68 /* This side of the conversion is fully algorithmic. */ |
| 69 |
| 70 /* Any spaces at the end of the tag are replaced by repeating the last |
| 71 * letter. Eg 'nko ' -> 'Nkoo' */ |
| 72 if (unlikely ((tag & 0x0000FF00) == 0x00002000)) |
| 73 tag |= (tag >> 8) & 0x0000FF00; /* Copy second letter to third */ |
| 74 if (unlikely ((tag & 0x000000FF) == 0x00000020)) |
| 75 tag |= (tag >> 8) & 0x000000FF; /* Copy third letter to fourth */ |
| 76 |
| 77 /* Change first char to uppercase and return */ |
| 78 return (hb_script_t) (tag & ~0x20000000); |
| 79 } |
| 80 |
| 81 static hb_tag_t |
| 82 hb_ot_new_tag_from_script (hb_script_t script) |
| 83 { |
| 84 switch ((hb_tag_t) script) { |
| 85 case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2'); |
| 86 case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2'); |
| 87 case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2'); |
| 88 case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2'); |
| 89 case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2'); |
| 90 case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2'); |
| 91 case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2'); |
| 92 case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2'); |
| 93 case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2'); |
| 94 } |
| 95 |
| 96 return HB_OT_TAG_DEFAULT_SCRIPT; |
| 97 } |
| 98 |
| 99 static hb_script_t |
| 100 hb_ot_new_tag_to_script (hb_tag_t tag) |
| 101 { |
| 102 switch (tag) { |
| 103 case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI; |
| 104 case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI; |
| 105 case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI; |
| 106 case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI; |
| 107 case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA; |
| 108 case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM; |
| 109 case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA; |
| 110 case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL; |
| 111 case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU; |
| 112 } |
| 113 |
| 114 return HB_SCRIPT_UNKNOWN; |
| 115 } |
| 116 |
35 /* | 117 /* |
36 * Complete list at: | 118 * Complete list at: |
37 * http://www.microsoft.com/typography/otspec/scripttags.htm | 119 * https://www.microsoft.com/typography/otspec/scripttags.htm |
| 120 * https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm |
| 121 * |
| 122 * Most of the script tags are the same as the ISO 15924 tag but lowercased. |
| 123 * So we just do that, and handle the exceptional cases in a switch. |
38 */ | 124 */ |
39 static const hb_tag_t ot_scripts[][3] = { | |
40 {HB_TAG('D','F','L','T')}, /* HB_SCRIPT_COMMON */ | |
41 {HB_TAG('D','F','L','T')}, /* HB_SCRIPT_INHERITED */ | |
42 {HB_TAG('a','r','a','b')}, /* HB_SCRIPT_ARABIC */ | |
43 {HB_TAG('a','r','m','n')}, /* HB_SCRIPT_ARMENIAN */ | |
44 {HB_TAG('b','n','g','2'), HB_TAG('b','e','n','g')}, /* HB_SCRIPT_BENGALI */ | |
45 {HB_TAG('b','o','p','o')}, /* HB_SCRIPT_BOPOMOFO */ | |
46 {HB_TAG('c','h','e','r')}, /* HB_SCRIPT_CHEROKEE */ | |
47 {HB_TAG('c','o','p','t')}, /* HB_SCRIPT_COPTIC */ | |
48 {HB_TAG('c','y','r','l')}, /* HB_SCRIPT_CYRILLIC */ | |
49 {HB_TAG('d','s','r','t')}, /* HB_SCRIPT_DESERET */ | |
50 {HB_TAG('d','e','v','2'), HB_TAG('d','e','v','a')}, /* HB_SCRIPT_DEVANAGARI
*/ | |
51 {HB_TAG('e','t','h','i')}, /* HB_SCRIPT_ETHIOPIC */ | |
52 {HB_TAG('g','e','o','r')}, /* HB_SCRIPT_GEORGIAN */ | |
53 {HB_TAG('g','o','t','h')}, /* HB_SCRIPT_GOTHIC */ | |
54 {HB_TAG('g','r','e','k')}, /* HB_SCRIPT_GREEK */ | |
55 {HB_TAG('g','j','r','2'), HB_TAG('g','u','j','r')}, /* HB_SCRIPT_GUJARATI */ | |
56 {HB_TAG('g','u','r','2'), HB_TAG('g','u','r','u')}, /* HB_SCRIPT_GURMUKHI */ | |
57 {HB_TAG('h','a','n','i')}, /* HB_SCRIPT_HAN */ | |
58 {HB_TAG('h','a','n','g')}, /* HB_SCRIPT_HANGUL */ | |
59 {HB_TAG('h','e','b','r')}, /* HB_SCRIPT_HEBREW */ | |
60 {HB_TAG('k','a','n','a')}, /* HB_SCRIPT_HIRAGANA */ | |
61 {HB_TAG('k','n','d','2'), HB_TAG('k','n','d','a')}, /* HB_SCRIPT_KANNADA */ | |
62 {HB_TAG('k','a','n','a')}, /* HB_SCRIPT_KATAKANA */ | |
63 {HB_TAG('k','h','m','r')}, /* HB_SCRIPT_KHMER */ | |
64 {HB_TAG('l','a','o',' ')}, /* HB_SCRIPT_LAO */ | |
65 {HB_TAG('l','a','t','n')}, /* HB_SCRIPT_LATIN */ | |
66 {HB_TAG('m','l','m','2'), HB_TAG('m','l','y','m')}, /* HB_SCRIPT_MALAYALAM *
/ | |
67 {HB_TAG('m','o','n','g')}, /* HB_SCRIPT_MONGOLIAN */ | |
68 {HB_TAG('m','y','m','r')}, /* HB_SCRIPT_MYANMAR */ | |
69 {HB_TAG('o','g','a','m')}, /* HB_SCRIPT_OGHAM */ | |
70 {HB_TAG('i','t','a','l')}, /* HB_SCRIPT_OLD_ITALIC */ | |
71 {HB_TAG('o','r','y','2'), HB_TAG('o','r','y','a')}, /* HB_SCRIPT_ORIYA */ | |
72 {HB_TAG('r','u','n','r')}, /* HB_SCRIPT_RUNIC */ | |
73 {HB_TAG('s','i','n','h')}, /* HB_SCRIPT_SINHALA */ | |
74 {HB_TAG('s','y','r','c')}, /* HB_SCRIPT_SYRIAC */ | |
75 {HB_TAG('t','m','l','2'), HB_TAG('t','a','m','l')}, /* HB_SCRIPT_TAMIL */ | |
76 {HB_TAG('t','e','l','2'), HB_TAG('t','e','l','u')}, /* HB_SCRIPT_TELUGU */ | |
77 {HB_TAG('t','h','a','a')}, /* HB_SCRIPT_THAANA */ | |
78 {HB_TAG('t','h','a','i')}, /* HB_SCRIPT_THAI */ | |
79 {HB_TAG('t','i','b','t')}, /* HB_SCRIPT_TIBETAN */ | |
80 {HB_TAG('c','a','n','s')}, /* HB_SCRIPT_CANADIAN_ABORIGINAL */ | |
81 {HB_TAG('y','i',' ',' ')}, /* HB_SCRIPT_YI */ | |
82 {HB_TAG('t','g','l','g')}, /* HB_SCRIPT_TAGALOG */ | |
83 {HB_TAG('h','a','n','o')}, /* HB_SCRIPT_HANUNOO */ | |
84 {HB_TAG('b','u','h','d')}, /* HB_SCRIPT_BUHID */ | |
85 {HB_TAG('t','a','g','b')}, /* HB_SCRIPT_TAGBANWA */ | |
86 | 125 |
87 /* Unicode-4.0 additions */ | 126 void |
88 {HB_TAG('b','r','a','i')},» /* HB_SCRIPT_BRAILLE */ | 127 hb_ot_tags_from_script (hb_script_t script, |
89 {HB_TAG('c','p','r','t')},» /* HB_SCRIPT_CYPRIOT */ | 128 » » » hb_tag_t *script_tag_1, |
90 {HB_TAG('l','i','m','b')},» /* HB_SCRIPT_LIMBU */ | 129 » » » hb_tag_t *script_tag_2) |
91 {HB_TAG('o','s','m','a')},» /* HB_SCRIPT_OSMANYA */ | 130 { |
92 {HB_TAG('s','h','a','w')},» /* HB_SCRIPT_SHAVIAN */ | 131 hb_tag_t new_tag; |
93 {HB_TAG('l','i','n','b')},» /* HB_SCRIPT_LINEAR_B */ | |
94 {HB_TAG('t','a','l','e')},» /* HB_SCRIPT_TAI_LE */ | |
95 {HB_TAG('u','g','a','r')},» /* HB_SCRIPT_UGARITIC */ | |
96 | 132 |
97 /* Unicode-4.1 additions */ | 133 *script_tag_2 = HB_OT_TAG_DEFAULT_SCRIPT; |
98 {HB_TAG('t','a','l','u')},» /* HB_SCRIPT_NEW_TAI_LUE */ | 134 *script_tag_1 = hb_ot_old_tag_from_script (script); |
99 {HB_TAG('b','u','g','i')},» /* HB_SCRIPT_BUGINESE */ | |
100 {HB_TAG('g','l','a','g')},» /* HB_SCRIPT_GLAGOLITIC */ | |
101 {HB_TAG('t','f','n','g')},» /* HB_SCRIPT_TIFINAGH */ | |
102 {HB_TAG('s','y','l','o')},» /* HB_SCRIPT_SYLOTI_NAGRI */ | |
103 {HB_TAG('x','p','e','o')},» /* HB_SCRIPT_OLD_PERSIAN */ | |
104 {HB_TAG('k','h','a','r')},» /* HB_SCRIPT_KHAROSHTHI */ | |
105 | 135 |
106 /* Unicode-5.0 additions */ | 136 new_tag = hb_ot_new_tag_from_script (script); |
107 {HB_TAG('D','F','L','T')},» /* HB_SCRIPT_UNKNOWN */ | 137 if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT)) { |
108 {HB_TAG('b','a','l','i')},» /* HB_SCRIPT_BALINESE */ | 138 *script_tag_2 = *script_tag_1; |
109 {HB_TAG('x','s','u','x')},» /* HB_SCRIPT_CUNEIFORM */ | 139 *script_tag_1 = new_tag; |
110 {HB_TAG('p','h','n','x')},» /* HB_SCRIPT_PHOENICIAN */ | 140 } |
111 {HB_TAG('p','h','a','g')},» /* HB_SCRIPT_PHAGS_PA */ | |
112 {HB_TAG('n','k','o',' ')},» /* HB_SCRIPT_NKO */ | |
113 | |
114 /* Unicode-5.1 additions */ | |
115 {HB_TAG('k','a','l','i')},» /* HB_SCRIPT_KAYAH_LI */ | |
116 {HB_TAG('l','e','p','c')},» /* HB_SCRIPT_LEPCHA */ | |
117 {HB_TAG('r','j','n','g')},» /* HB_SCRIPT_REJANG */ | |
118 {HB_TAG('s','u','n','d')},» /* HB_SCRIPT_SUNDANESE */ | |
119 {HB_TAG('s','a','u','r')},» /* HB_SCRIPT_SAURASHTRA */ | |
120 {HB_TAG('c','h','a','m')},» /* HB_SCRIPT_CHAM */ | |
121 {HB_TAG('o','l','c','k')},» /* HB_SCRIPT_OL_CHIKI */ | |
122 {HB_TAG('v','a','i',' ')},» /* HB_SCRIPT_VAI */ | |
123 {HB_TAG('c','a','r','i')},» /* HB_SCRIPT_CARIAN */ | |
124 {HB_TAG('l','y','c','i')},» /* HB_SCRIPT_LYCIAN */ | |
125 {HB_TAG('l','y','d','i')},» /* HB_SCRIPT_LYDIAN */ | |
126 | |
127 /* Unicode-5.2 additions */ | |
128 {HB_TAG('a','v','s','t')},» /* HB_SCRIPT_AVESTAN */ | |
129 {HB_TAG('b','a','m','u')},» /* HB_SCRIPT_BAMUM */ | |
130 {HB_TAG('e','g','y','p')},» /* HB_SCRIPT_EGYPTIAN_HIEROGLYPHS */ | |
131 {HB_TAG('a','r','m','i')},» /* HB_SCRIPT_IMPERIAL_ARAMAIC */ | |
132 {HB_TAG('p','h','l','i')},» /* HB_SCRIPT_INSCRIPTIONAL_PAHLAVI */ | |
133 {HB_TAG('p','r','t','i')},» /* HB_SCRIPT_INSCRIPTIONAL_PARTHIAN */ | |
134 {HB_TAG('j','a','v','a')},» /* HB_SCRIPT_JAVANESE */ | |
135 {HB_TAG('k','t','h','i')},» /* HB_SCRIPT_KAITHI */ | |
136 {HB_TAG('l','i','s','u')},» /* HB_SCRIPT_LISU */ | |
137 {HB_TAG('m','y','e','i')},» /* HB_SCRIPT_MEETEI_MAYEK */ | |
138 {HB_TAG('s','a','r','b')},» /* HB_SCRIPT_OLD_SOUTH_ARABIAN */ | |
139 {HB_TAG('o','r','k','h')},» /* HB_SCRIPT_OLD_TURKIC */ | |
140 {HB_TAG('s','a','m','r')},» /* HB_SCRIPT_SAMARITAN */ | |
141 {HB_TAG('l','a','n','a')},» /* HB_SCRIPT_TAI_THAM */ | |
142 {HB_TAG('t','a','v','t')},» /* HB_SCRIPT_TAI_VIET */ | |
143 | |
144 /* Unicode-6.0 additions */ | |
145 {HB_TAG('b','a','t','k')},» /* HB_SCRIPT_BATAK */ | |
146 {HB_TAG('b','r','a','h')},» /* HB_SCRIPT_BRAHMI */ | |
147 {HB_TAG('m','a','n','d')} » /* HB_SCRIPT_MANDAIC */ | |
148 }; | |
149 | |
150 const hb_tag_t * | |
151 hb_ot_tags_from_script (hb_script_t script) | |
152 { | |
153 static const hb_tag_t def_tag[] = {HB_OT_TAG_DEFAULT_SCRIPT, HB_TAG_NONE}; | |
154 | |
155 if (unlikely ((unsigned int) script >= ARRAY_LENGTH (ot_scripts))) | |
156 return def_tag; | |
157 | |
158 return ot_scripts[script]; | |
159 } | 141 } |
160 | 142 |
161 hb_script_t | 143 hb_script_t |
162 hb_ot_tag_to_script (hb_tag_t tag) | 144 hb_ot_tag_to_script (hb_tag_t tag) |
163 { | 145 { |
164 int i; | 146 if (unlikely ((tag & 0x000000FF) == '2')) |
| 147 return hb_ot_new_tag_to_script (tag); |
165 | 148 |
166 for (i = 0; i < ARRAY_LENGTH (ot_scripts); i++) { | 149 return hb_ot_old_tag_to_script (tag); |
167 const hb_tag_t *p; | 150 } |
168 for (p = ot_scripts[i]; *p; p++) | |
169 if (tag == *p) | |
170 return i; | |
171 } | |
172 | 151 |
173 return HB_SCRIPT_UNKNOWN; | 152 |
174 } | 153 /* hb_language_t */ |
175 | 154 |
176 typedef struct { | 155 typedef struct { |
177 char language[6]; | 156 char language[6]; |
178 hb_tag_t tag; | 157 hb_tag_t tag; |
179 } LangTag; | 158 } LangTag; |
180 | 159 |
181 /* | 160 /* |
182 * Complete list at: | 161 * Complete list at: |
183 * http://www.microsoft.com/typography/otspec/languagetags.htm | 162 * http://www.microsoft.com/typography/otspec/languagetags.htm |
184 * | 163 * |
185 * Generated by intersecting the OpenType language tag list from | 164 * Generated by intersecting the OpenType language tag list from |
186 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from | 165 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from |
187 * 2008/08/04, matching on name, and finally adjusted manually. | 166 * 2008/08/04, matching on name, and finally adjusted manually. |
188 * | 167 * |
189 * Many items still missing. Those are commented out at the end. | 168 * Many items still missing. Those are commented out at the end. |
190 * Keep sorted for bsearch. | 169 * Keep sorted for bsearch. |
191 */ | 170 */ |
| 171 |
192 static const LangTag ot_languages[] = { | 172 static const LangTag ot_languages[] = { |
193 {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ | 173 {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ |
194 {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ | 174 {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ |
195 {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ | 175 {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ |
196 {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */ | 176 {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */ |
197 {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ | 177 {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ |
198 {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ | 178 {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ |
199 {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ | 179 {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ |
200 {"ar", HB_TAG('A','R','A',' ')}, /* Arabic */ | 180 {"ar", HB_TAG('A','R','A',' ')}, /* Arabic */ |
201 {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ | 181 {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ |
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
468 {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ | 448 {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ |
469 {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ | 449 {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ |
470 {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ | 450 {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ |
471 {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ | 451 {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ |
472 {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ | 452 {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ |
473 {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */ | 453 {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */ |
474 {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ | 454 {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ |
475 {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ | 455 {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ |
476 {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ | 456 {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ |
477 {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ | 457 {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ |
478 {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ | |
479 {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ | |
480 {"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */ | |
481 {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ | |
482 {"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */ | |
483 {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ | 458 {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ |
484 {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ | 459 {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ |
485 | 460 |
486 /* I couldn't find the language id for these */ | 461 /* I couldn't find the language id for these */ |
487 | 462 |
488 /*{"??", HB_TAG('A','G','W',' ')},*/ /* Agaw */ | 463 /*{"??", HB_TAG('A','G','W',' ')},*/ /* Agaw */ |
489 /*{"??", HB_TAG('A','L','S',' ')},*/ /* Alsatian */ | 464 /*{"??", HB_TAG('A','L','S',' ')},*/ /* Alsatian */ |
490 /*{"??", HB_TAG('A','L','T',' ')},*/ /* Altai */ | 465 /*{"??", HB_TAG('A','L','T',' ')},*/ /* Altai */ |
491 /*{"??", HB_TAG('A','R','K',' ')},*/ /* Arakanese */ | 466 /*{"??", HB_TAG('A','R','K',' ')},*/ /* Arakanese */ |
492 /*{"??", HB_TAG('A','T','H',' ')},*/ /* Athapaskan */ | 467 /*{"??", HB_TAG('A','T','H',' ')},*/ /* Athapaskan */ |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
588 /*{"??", HB_TAG('T','U','A',' ')},*/ /* Turoyo Aramaic */ | 563 /*{"??", HB_TAG('T','U','A',' ')},*/ /* Turoyo Aramaic */ |
589 /*{"??", HB_TAG('T','U','V',' ')},*/ /* Tuvin */ | 564 /*{"??", HB_TAG('T','U','V',' ')},*/ /* Tuvin */ |
590 /*{"??", HB_TAG('W','C','R',' ')},*/ /* West-Cree */ | 565 /*{"??", HB_TAG('W','C','R',' ')},*/ /* West-Cree */ |
591 /*{"??", HB_TAG('X','B','D',' ')},*/ /* Tai Lue */ | 566 /*{"??", HB_TAG('X','B','D',' ')},*/ /* Tai Lue */ |
592 /*{"??", HB_TAG('Y','C','R',' ')},*/ /* Y-Cree */ | 567 /*{"??", HB_TAG('Y','C','R',' ')},*/ /* Y-Cree */ |
593 /*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */ | 568 /*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */ |
594 /*{"??", HB_TAG('Y','I','M',' ')},*/ /* Yi Modern */ | 569 /*{"??", HB_TAG('Y','I','M',' ')},*/ /* Yi Modern */ |
595 /*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ | 570 /*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ |
596 }; | 571 }; |
597 | 572 |
| 573 static const LangTag ot_languages_zh[] = { |
| 574 {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ |
| 575 {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ |
| 576 {"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */ |
| 577 {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ |
| 578 {"zh-tw", HB_TAG('Z','H','T',' ')} /* Chinese (Taiwan) */ |
| 579 }; |
| 580 |
598 static int | 581 static int |
599 lang_compare_first_component (const char *a, | 582 lang_compare_first_component (const char *a, |
600 const char *b) | 583 const char *b) |
601 { | 584 { |
602 unsigned int da, db; | 585 unsigned int da, db; |
603 const char *p; | 586 const char *p; |
604 | 587 |
605 p = strstr (a, "-"); | 588 p = strchr (a, '-'); |
606 da = p ? (unsigned int) (p - a) : strlen (a); | 589 da = p ? (unsigned int) (p - a) : strlen (a); |
607 | 590 |
608 p = strstr (b, "-"); | 591 p = strchr (b, '-'); |
609 db = p ? (unsigned int) (p - b) : strlen (b); | 592 db = p ? (unsigned int) (p - b) : strlen (b); |
610 | 593 |
611 return strncmp (a, b, MAX (da, db)); | 594 return strncmp (a, b, MAX (da, db)); |
612 } | 595 } |
613 | 596 |
614 static hb_bool_t | 597 static hb_bool_t |
615 lang_matches (const char *lang_str, const char *spec) | 598 lang_matches (const char *lang_str, const char *spec) |
616 { | 599 { |
617 unsigned int len = strlen (spec); | 600 unsigned int len = strlen (spec); |
618 | 601 |
619 return lang_str && strncmp (lang_str, spec, len) == 0 && | 602 return strncmp (lang_str, spec, len) == 0 && |
620 (lang_str[len] == '\0' || lang_str[len] == '-'); | 603 (lang_str[len] == '\0' || lang_str[len] == '-'); |
621 } | 604 } |
622 | 605 |
623 hb_tag_t | 606 hb_tag_t |
624 hb_ot_tag_from_language (hb_language_t language) | 607 hb_ot_tag_from_language (hb_language_t language) |
625 { | 608 { |
626 const char *lang_str; | 609 const char *lang_str, *s; |
627 LangTag *lang_tag; | 610 const LangTag *lang_tag; |
628 | 611 |
629 if (language == NULL) | 612 if (language == HB_LANGUAGE_INVALID) |
630 return HB_OT_TAG_DEFAULT_LANGUAGE; | 613 return HB_OT_TAG_DEFAULT_LANGUAGE; |
631 | 614 |
632 lang_str = hb_language_to_string (language); | 615 lang_str = hb_language_to_string (language); |
633 | 616 |
634 if (0 == strcmp (lang_str, "x-hbot")) { | 617 s = strstr (lang_str, "x-hbot"); |
| 618 if (s) { |
635 char tag[4]; | 619 char tag[4]; |
636 int i; | 620 int i; |
637 lang_str += 6; | 621 s += 6; |
638 #define IS_LETTER(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) | 622 for (i = 0; i < 4 && ISALPHA (s[i]); i++) |
639 #define TO_UPPER(c) (((c) >= 'a' && (c) <= 'z') ? (c) + 'A' - 'a' : (c)) | 623 tag[i] = TOUPPER (s[i]); |
640 for (i = 0; i < 4 && IS_LETTER (lang_str[i]); i++) | 624 if (i) { |
641 tag[i] = TO_UPPER (lang_str[i]); | 625 for (; i < 4; i++) |
642 for (; i < 4; i++) | 626 » tag[i] = ' '; |
643 tag[i] = ' '; | 627 return HB_TAG_CHAR4 (tag); |
644 return HB_TAG_STR (tag); | 628 } |
645 } | 629 } |
646 | 630 |
647 /* find a language matching in the first component */ | 631 /* Find a language matching in the first component */ |
648 lang_tag = bsearch (lang_str, ot_languages, | 632 lang_tag = (LangTag *) bsearch (lang_str, ot_languages, |
649 » » ARRAY_LENGTH (ot_languages), sizeof (LangTag), | 633 » » » » ARRAY_LENGTH (ot_languages), sizeof (LangTag), |
650 » » (hb_compare_func_t) lang_compare_first_component); | 634 » » » » (hb_compare_func_t) lang_compare_first_compone
nt); |
651 | |
652 /* we now need to find the best language matching */ | |
653 if (lang_tag) | |
654 { | |
655 hb_bool_t found = FALSE; | |
656 | |
657 /* go to the final one matching in the first component */ | |
658 while (lang_tag + 1 < ot_languages + ARRAY_LENGTH (ot_languages) && | |
659 » lang_compare_first_component (lang_str, (lang_tag + 1)->language) ==
0) | |
660 lang_tag++; | |
661 | |
662 /* go back, find which one matches completely */ | |
663 while (lang_tag >= ot_languages && | |
664 » lang_compare_first_component (lang_str, lang_tag->language) == 0) | |
665 { | |
666 if (lang_matches (lang_str, lang_tag->language)) { | |
667 » found = TRUE; | |
668 » break; | |
669 } | |
670 | |
671 lang_tag--; | |
672 } | |
673 | |
674 if (!found) | |
675 lang_tag = NULL; | |
676 } | |
677 | |
678 if (lang_tag) | 635 if (lang_tag) |
679 return lang_tag->tag; | 636 return lang_tag->tag; |
680 | 637 |
| 638 /* Otherwise, check the Chinese ones */ |
| 639 if (0 == lang_compare_first_component (lang_str, "zh")) |
| 640 { |
| 641 unsigned int i; |
| 642 |
| 643 for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++) |
| 644 { |
| 645 lang_tag = &ot_languages_zh[i]; |
| 646 if (lang_matches (lang_tag->language, lang_str)) |
| 647 return lang_tag->tag; |
| 648 } |
| 649 |
| 650 /* Otherwise just return 'ZHS ' */ |
| 651 return HB_TAG('Z','H','S',' '); |
| 652 } |
| 653 |
| 654 s = strchr (lang_str, '-'); |
| 655 if (!s) |
| 656 s = lang_str + strlen (lang_str); |
| 657 if (s - lang_str == 3) { |
| 658 /* Assume it's ISO-639-3 and upper-case and use it. */ |
| 659 return hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000; |
| 660 } |
| 661 |
681 return HB_OT_TAG_DEFAULT_LANGUAGE; | 662 return HB_OT_TAG_DEFAULT_LANGUAGE; |
682 } | 663 } |
683 | 664 |
684 hb_language_t | 665 hb_language_t |
685 hb_ot_tag_to_language (hb_tag_t tag) | 666 hb_ot_tag_to_language (hb_tag_t tag) |
686 { | 667 { |
687 unsigned int i; | 668 unsigned int i; |
688 unsigned char buf[11] = "x-hbot"; | 669 |
| 670 if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) |
| 671 return NULL; |
689 | 672 |
690 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) | 673 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) |
691 if (ot_languages[i].tag == tag) | 674 if (ot_languages[i].tag == tag) |
692 return hb_language_from_string (ot_languages[i].language); | 675 return hb_language_from_string (ot_languages[i].language, -1); |
693 | 676 |
694 buf[6] = tag >> 24; | 677 /* If tag starts with ZH, it's Chinese */ |
695 buf[7] = (tag >> 16) & 0xFF; | 678 if ((tag & 0xFFFF0000) == 0x5A480000) { |
696 buf[8] = (tag >> 8) & 0xFF; | 679 switch (tag) { |
697 buf[9] = tag & 0xFF; | 680 case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk", -1)
; /* Hong Kong */ |
698 buf[10] = '\0'; | 681 default: { |
699 return hb_language_from_string ((char *) buf); | 682 /* Encode the tag... */ |
| 683 » unsigned char buf[14] = "zh-x-hbot"; |
| 684 » buf[9] = tag >> 24; |
| 685 » buf[10] = (tag >> 16) & 0xFF; |
| 686 » buf[11] = (tag >> 8) & 0xFF; |
| 687 » buf[12] = tag & 0xFF; |
| 688 » if (buf[12] == 0x20) |
| 689 » buf[12] = '\0'; |
| 690 » buf[13] = '\0'; |
| 691 » return hb_language_from_string ((char *) buf, -1); |
| 692 } |
| 693 } |
| 694 } |
| 695 |
| 696 /* Else return a custom language in the form of "x-hbotABCD" */ |
| 697 { |
| 698 unsigned char buf[11] = "x-hbot"; |
| 699 buf[6] = tag >> 24; |
| 700 buf[7] = (tag >> 16) & 0xFF; |
| 701 buf[8] = (tag >> 8) & 0xFF; |
| 702 buf[9] = tag & 0xFF; |
| 703 if (buf[9] == 0x20) |
| 704 buf[9] = '\0'; |
| 705 buf[10] = '\0'; |
| 706 return hb_language_from_string ((char *) buf, -1); |
| 707 } |
700 } | 708 } |
701 | 709 |
702 | 710 |
703 HB_END_DECLS | |
OLD | NEW |