OLD | NEW |
1 /* | 1 /* |
2 * Copyright © 2011 Google, Inc. | 2 * Copyright © 2011,2012 Google, Inc. |
3 * | 3 * |
4 * This is part of HarfBuzz, a text shaping library. | 4 * This is part of HarfBuzz, a text shaping library. |
5 * | 5 * |
6 * Permission is hereby granted, without written agreement and without | 6 * Permission is hereby granted, without written agreement and without |
7 * license or royalty fees, to use, copy, modify, and distribute this | 7 * license or royalty fees, to use, copy, modify, and distribute this |
8 * software and its documentation for any purpose, provided that the | 8 * software and its documentation for any purpose, provided that the |
9 * above copyright notice and the following two paragraphs appear in | 9 * above copyright notice and the following two paragraphs appear in |
10 * all copies of this software. | 10 * all copies of this software. |
11 * | 11 * |
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 * DAMAGE. | 16 * DAMAGE. |
17 * | 17 * |
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 * | 23 * |
24 * Google Author(s): Behdad Esfahbod | 24 * Google Author(s): Behdad Esfahbod |
25 */ | 25 */ |
26 | 26 |
27 #include "hb-ot-shape-complex-private.hh" | 27 #include "hb-ot-shape-complex-indic-private.hh" |
| 28 #include "hb-ot-shape-private.hh" |
28 | 29 |
| 30 static const struct indic_options_t |
| 31 { |
| 32 indic_options_t (void) |
| 33 { |
| 34 char *c = getenv ("HB_OT_INDIC_OPTIONS"); |
| 35 uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); |
| 36 } |
29 | 37 |
30 | 38 bool uniscribe_bug_compatible; |
31 /* buffer var allocations */ | 39 } options; |
32 #define indic_category() complex_var_persistent_u8_0() /* indic_category_t */ | |
33 #define indic_position() complex_var_persistent_u8_1() /* indic_matra_category_t
*/ | |
34 | |
35 #define INDIC_TABLE_ELEMENT_TYPE uint8_t | |
36 | |
37 /* Cateories used in the OpenType spec: | |
38 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx | |
39 */ | |
40 /* Note: This enum is duplicated in the -machine.rl source file. | |
41 * Not sure how to avoid duplication. */ | |
42 enum indic_category_t { | |
43 OT_X = 0, | |
44 OT_C, | |
45 OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */ | |
46 OT_V, | |
47 OT_N, | |
48 OT_H, | |
49 OT_ZWNJ, | |
50 OT_ZWJ, | |
51 OT_M, | |
52 OT_SM, | |
53 OT_VD, | |
54 OT_A, | |
55 OT_NBSP | |
56 }; | |
57 | |
58 /* Visual positions in a syllable from left to right. */ | |
59 enum indic_position_t { | |
60 POS_PRE, | |
61 POS_BASE, | |
62 POS_ABOVE, | |
63 POS_BELOW, | |
64 POS_POST | |
65 }; | |
66 | |
67 /* Categories used in IndicSyllabicCategory.txt from UCD */ | |
68 /* The assignments are guesswork */ | |
69 enum indic_syllabic_category_t { | |
70 INDIC_SYLLABIC_CATEGORY_OTHER»» » = OT_X, | |
71 | |
72 INDIC_SYLLABIC_CATEGORY_AVAGRAHA» » = OT_X, | |
73 INDIC_SYLLABIC_CATEGORY_BINDU»» » = OT_SM, | |
74 INDIC_SYLLABIC_CATEGORY_CONSONANT» » = OT_C, | |
75 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD» = OT_C, | |
76 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_C, | |
77 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER»= OT_C, | |
78 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL» = OT_C, | |
79 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER»= OT_NBSP, | |
80 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED» = OT_C, | |
81 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA» = OT_C, | |
82 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER» = OT_X, | |
83 INDIC_SYLLABIC_CATEGORY_NUKTA»» » = OT_N, | |
84 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER» = OT_X, | |
85 INDIC_SYLLABIC_CATEGORY_TONE_LETTER» » = OT_X, | |
86 INDIC_SYLLABIC_CATEGORY_TONE_MARK» » = OT_X, | |
87 INDIC_SYLLABIC_CATEGORY_VIRAMA» » = OT_H, | |
88 INDIC_SYLLABIC_CATEGORY_VISARGA» » = OT_SM, | |
89 INDIC_SYLLABIC_CATEGORY_VOWEL»» » = OT_V, | |
90 INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT» = OT_M, | |
91 INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT» = OT_V | |
92 }; | |
93 | |
94 /* Categories used in IndicSMatraCategory.txt from UCD */ | |
95 enum indic_matra_category_t { | |
96 INDIC_MATRA_CATEGORY_NOT_APPLICABLE» » = POS_BASE, | |
97 | |
98 INDIC_MATRA_CATEGORY_LEFT» » » = POS_PRE, | |
99 INDIC_MATRA_CATEGORY_TOP» » » = POS_ABOVE, | |
100 INDIC_MATRA_CATEGORY_BOTTOM» » » = POS_BELOW, | |
101 INDIC_MATRA_CATEGORY_RIGHT» » » = POS_POST, | |
102 | |
103 /* We don't really care much about these since we decompose them | |
104 * in the generic pre-shaping layer. They will only be used if | |
105 * the font does not cover the decomposition. In which case, we | |
106 * define these as aliases to the place we want the split-matra | |
107 * glyph to show up. Quite arbitrary. */ | |
108 INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT»» = INDIC_MATRA_CATEGORY_BOTTOM, | |
109 INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT» » = INDIC_MATRA_CATEGORY_LEFT, | |
110 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM» » = INDIC_MATRA_CATEGORY_BOTTOM, | |
111 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT»= INDIC_MATRA_CATEGORY_BOTTOM, | |
112 INDIC_MATRA_CATEGORY_TOP_AND_LEFT» » = INDIC_MATRA_CATEGORY_LEFT, | |
113 INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT» = INDIC_MATRA_CATEGORY_LEFT, | |
114 INDIC_MATRA_CATEGORY_TOP_AND_RIGHT» » = INDIC_MATRA_CATEGORY_RIGHT, | |
115 | |
116 INDIC_MATRA_CATEGORY_INVISIBLE» » = INDIC_MATRA_CATEGORY_NOT_APPLI
CABLE, | |
117 INDIC_MATRA_CATEGORY_OVERSTRUCK» » = INDIC_MATRA_CATEGORY_NOT_APPLI
CABLE, | |
118 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT» = INDIC_MATRA_CATEGORY_NOT_APPLI
CABLE | |
119 }; | |
120 | |
121 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th
e comma operation | |
122 * because gcc fails to optimize the latter and fills the table in at runtime. *
/ | |
123 #define INDIC_COMBINE_CATEGORIES(S,M) \ | |
124 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE || (S == IN
DIC_SYLLABIC_CATEGORY_VIRAMA || S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) +
\ | |
125 ASSERT_STATIC_EXPR_ZERO (S < 16 && M < 16) + \ | |
126 ((M << 4) | S)) | |
127 | |
128 #include "hb-ot-shape-complex-indic-table.hh" | |
129 | |
130 /* XXX | |
131 * This is a hack for now. We should: | |
132 * 1. Move this data into the main Indic table, | |
133 * and/or | |
134 * 2. Probe font lookups to determine consonant positions. | |
135 */ | |
136 static const struct consonant_position_t { | |
137 hb_codepoint_t u; | |
138 indic_position_t position; | |
139 } consonant_positions[] = { | |
140 {0x0930, POS_BELOW}, | |
141 {0x09AC, POS_BELOW}, | |
142 {0x09AF, POS_POST}, | |
143 {0x09B0, POS_BELOW}, | |
144 {0x09F0, POS_BELOW}, | |
145 {0x0A2F, POS_POST}, | |
146 {0x0A30, POS_BELOW}, | |
147 {0x0A35, POS_BELOW}, | |
148 {0x0A39, POS_BELOW}, | |
149 {0x0AB0, POS_BELOW}, | |
150 {0x0B24, POS_BELOW}, | |
151 {0x0B28, POS_BELOW}, | |
152 {0x0B2C, POS_BELOW}, | |
153 {0x0B2D, POS_BELOW}, | |
154 {0x0B2E, POS_BELOW}, | |
155 {0x0B2F, POS_POST}, | |
156 {0x0B30, POS_BELOW}, | |
157 {0x0B32, POS_BELOW}, | |
158 {0x0B33, POS_BELOW}, | |
159 {0x0B5F, POS_POST}, | |
160 {0x0B71, POS_BELOW}, | |
161 {0x0C15, POS_BELOW}, | |
162 {0x0C16, POS_BELOW}, | |
163 {0x0C17, POS_BELOW}, | |
164 {0x0C18, POS_BELOW}, | |
165 {0x0C19, POS_BELOW}, | |
166 {0x0C1A, POS_BELOW}, | |
167 {0x0C1B, POS_BELOW}, | |
168 {0x0C1C, POS_BELOW}, | |
169 {0x0C1D, POS_BELOW}, | |
170 {0x0C1E, POS_BELOW}, | |
171 {0x0C1F, POS_BELOW}, | |
172 {0x0C20, POS_BELOW}, | |
173 {0x0C21, POS_BELOW}, | |
174 {0x0C22, POS_BELOW}, | |
175 {0x0C23, POS_BELOW}, | |
176 {0x0C24, POS_BELOW}, | |
177 {0x0C25, POS_BELOW}, | |
178 {0x0C26, POS_BELOW}, | |
179 {0x0C27, POS_BELOW}, | |
180 {0x0C28, POS_BELOW}, | |
181 {0x0C2A, POS_BELOW}, | |
182 {0x0C2B, POS_BELOW}, | |
183 {0x0C2C, POS_BELOW}, | |
184 {0x0C2D, POS_BELOW}, | |
185 {0x0C2E, POS_BELOW}, | |
186 {0x0C2F, POS_BELOW}, | |
187 {0x0C30, POS_BELOW}, | |
188 {0x0C32, POS_BELOW}, | |
189 {0x0C33, POS_BELOW}, | |
190 {0x0C35, POS_BELOW}, | |
191 {0x0C36, POS_BELOW}, | |
192 {0x0C37, POS_BELOW}, | |
193 {0x0C38, POS_BELOW}, | |
194 {0x0C39, POS_BELOW}, | |
195 {0x0C95, POS_BELOW}, | |
196 {0x0C96, POS_BELOW}, | |
197 {0x0C97, POS_BELOW}, | |
198 {0x0C98, POS_BELOW}, | |
199 {0x0C99, POS_BELOW}, | |
200 {0x0C9A, POS_BELOW}, | |
201 {0x0C9B, POS_BELOW}, | |
202 {0x0C9C, POS_BELOW}, | |
203 {0x0C9D, POS_BELOW}, | |
204 {0x0C9E, POS_BELOW}, | |
205 {0x0C9F, POS_BELOW}, | |
206 {0x0CA0, POS_BELOW}, | |
207 {0x0CA1, POS_BELOW}, | |
208 {0x0CA2, POS_BELOW}, | |
209 {0x0CA3, POS_BELOW}, | |
210 {0x0CA4, POS_BELOW}, | |
211 {0x0CA5, POS_BELOW}, | |
212 {0x0CA6, POS_BELOW}, | |
213 {0x0CA7, POS_BELOW}, | |
214 {0x0CA8, POS_BELOW}, | |
215 {0x0CAA, POS_BELOW}, | |
216 {0x0CAB, POS_BELOW}, | |
217 {0x0CAC, POS_BELOW}, | |
218 {0x0CAD, POS_BELOW}, | |
219 {0x0CAE, POS_BELOW}, | |
220 {0x0CAF, POS_BELOW}, | |
221 {0x0CB0, POS_BELOW}, | |
222 {0x0CB2, POS_BELOW}, | |
223 {0x0CB3, POS_BELOW}, | |
224 {0x0CB5, POS_BELOW}, | |
225 {0x0CB6, POS_BELOW}, | |
226 {0x0CB7, POS_BELOW}, | |
227 {0x0CB8, POS_BELOW}, | |
228 {0x0CB9, POS_BELOW}, | |
229 {0x0CDE, POS_BELOW}, | |
230 {0x0D2F, POS_POST}, | |
231 {0x0D30, POS_POST}, | |
232 {0x0D32, POS_BELOW}, | |
233 {0x0D35, POS_POST}, | |
234 }; | |
235 | |
236 /* XXX | |
237 * This is a hack for now. We should move this data into the main Indic table. | |
238 */ | |
239 static const hb_codepoint_t ra_chars[] = { | |
240 0x0930, /* Devanagari */ | |
241 0x09B0, /* Bengali */ | |
242 0x09F0, /* Bengali */ | |
243 //0x09F1, /* Bengali */ | |
244 //0x0A30, /* Gurmukhi */ | |
245 0x0AB0, /* Gujarati */ | |
246 0x0B30, /* Oriya */ | |
247 //0x0BB0, /* Tamil */ | |
248 //0x0C30, /* Telugu */ | |
249 0x0CB0, /* Kannada */ | |
250 //0x0D30, /* Malayalam */ | |
251 }; | |
252 | 40 |
253 static int | 41 static int |
254 compare_codepoint (const void *pa, const void *pb) | 42 compare_codepoint (const void *pa, const void *pb) |
255 { | 43 { |
256 hb_codepoint_t a = * (hb_codepoint_t *) pa; | 44 hb_codepoint_t a = * (hb_codepoint_t *) pa; |
257 hb_codepoint_t b = * (hb_codepoint_t *) pb; | 45 hb_codepoint_t b = * (hb_codepoint_t *) pb; |
258 | 46 |
259 return a < b ? -1 : a == b ? 0 : +1; | 47 return a < b ? -1 : a == b ? 0 : +1; |
260 } | 48 } |
261 | 49 |
262 static indic_position_t | 50 static indic_position_t |
263 consonant_position (hb_codepoint_t u) | 51 consonant_position (hb_codepoint_t u) |
264 { | 52 { |
265 consonant_position_t *record; | 53 consonant_position_t *record; |
266 | 54 |
267 record = (consonant_position_t *) bsearch (&u, consonant_positions, | 55 record = (consonant_position_t *) bsearch (&u, consonant_positions, |
268 ARRAY_LENGTH (consonant_positions), | 56 ARRAY_LENGTH (consonant_positions), |
269 sizeof (consonant_positions[0]), | 57 sizeof (consonant_positions[0]), |
270 compare_codepoint); | 58 compare_codepoint); |
271 | 59 |
272 return record ? record->position : POS_BASE; | 60 return record ? record->position : POS_BASE_C; |
273 } | 61 } |
274 | 62 |
275 static bool | 63 static bool |
276 is_ra (hb_codepoint_t u) | 64 is_ra (hb_codepoint_t u) |
277 { | 65 { |
278 return !!bsearch (&u, ra_chars, | 66 return !!bsearch (&u, ra_chars, |
279 ARRAY_LENGTH (ra_chars), | 67 ARRAY_LENGTH (ra_chars), |
280 sizeof (ra_chars[0]), | 68 sizeof (ra_chars[0]), |
281 compare_codepoint); | 69 compare_codepoint); |
282 } | 70 } |
283 | 71 |
284 static bool | 72 static bool |
285 is_joiner (const hb_glyph_info_t &info) | 73 is_joiner (const hb_glyph_info_t &info) |
286 { | 74 { |
287 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); | 75 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); |
288 } | 76 } |
289 | 77 |
290 static bool | 78 static bool |
291 is_consonant (const hb_glyph_info_t &info) | 79 is_consonant (const hb_glyph_info_t &info) |
292 { | 80 { |
293 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra))); | 81 /* Note: |
| 82 * |
| 83 * We treat Vowels and placeholders as if they were consonants. This is safe
because Vowels |
| 84 * cannot happen in a consonant syllable. The plus side however is, we can ca
ll the |
| 85 * consonant syllable logic from the vowel syllable function and get it all ri
ght! */ |
| 86 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (O
T_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))); |
294 } | 87 } |
295 | 88 |
296 static const struct { | 89 struct feature_list_t { |
297 hb_tag_t tag; | 90 hb_tag_t tag; |
298 hb_bool_t is_global; | 91 hb_bool_t is_global; |
299 } indic_basic_features[] = | 92 }; |
| 93 |
| 94 static const feature_list_t |
| 95 indic_basic_features[] = |
300 { | 96 { |
301 {HB_TAG('n','u','k','t'), true}, | 97 {HB_TAG('n','u','k','t'), true}, |
302 {HB_TAG('a','k','h','n'), false}, | 98 {HB_TAG('a','k','h','n'), false}, |
303 {HB_TAG('r','p','h','f'), false}, | 99 {HB_TAG('r','p','h','f'), false}, |
304 {HB_TAG('r','k','r','f'), false}, | 100 {HB_TAG('r','k','r','f'), true}, |
305 {HB_TAG('p','r','e','f'), false}, | 101 {HB_TAG('p','r','e','f'), false}, |
306 {HB_TAG('b','l','w','f'), false}, | 102 {HB_TAG('b','l','w','f'), false}, |
307 {HB_TAG('h','a','l','f'), false}, | 103 {HB_TAG('h','a','l','f'), false}, |
308 {HB_TAG('v','a','t','u'), true}, | |
309 {HB_TAG('p','s','t','f'), false}, | 104 {HB_TAG('p','s','t','f'), false}, |
310 {HB_TAG('c','j','c','t'), false}, | 105 {HB_TAG('c','j','c','t'), false}, |
| 106 {HB_TAG('v','a','t','u'), true}, |
311 }; | 107 }; |
312 | 108 |
313 /* Same order as the indic_basic_features array */ | 109 /* Same order as the indic_basic_features array */ |
314 enum { | 110 enum { |
315 _NUKT, | 111 _NUKT, |
316 AKHN, | 112 AKHN, |
317 RPHF, | 113 RPHF, |
318 RKRF, | 114 _RKRF, |
319 PREF, | 115 PREF, |
320 BLWF, | 116 BLWF, |
321 HALF, | 117 HALF, |
322 _VATU, | |
323 PSTF, | 118 PSTF, |
324 CJCT | 119 CJCT, |
| 120 VATU |
325 }; | 121 }; |
326 | 122 |
327 static const hb_tag_t indic_other_features[] = | 123 static const feature_list_t |
| 124 indic_other_features[] = |
328 { | 125 { |
329 HB_TAG('p','r','e','s'), | 126 {HB_TAG('i','n','i','t'), false}, |
330 HB_TAG('a','b','v','s'), | 127 {HB_TAG('p','r','e','s'), true}, |
331 HB_TAG('b','l','w','s'), | 128 {HB_TAG('a','b','v','s'), true}, |
332 HB_TAG('p','s','t','s'), | 129 {HB_TAG('b','l','w','s'), true}, |
333 HB_TAG('h','a','l','n'), | 130 {HB_TAG('p','s','t','s'), true}, |
| 131 {HB_TAG('h','a','l','n'), true}, |
334 | 132 |
335 HB_TAG('d','i','s','t'), | 133 {HB_TAG('d','i','s','t'), true}, |
336 HB_TAG('a','b','v','m'), | 134 {HB_TAG('a','b','v','m'), true}, |
337 HB_TAG('b','l','w','m'), | 135 {HB_TAG('b','l','w','m'), true}, |
| 136 }; |
| 137 |
| 138 /* Same order as the indic_other_features array */ |
| 139 enum { |
| 140 INIT |
338 }; | 141 }; |
339 | 142 |
340 | 143 |
341 static void | 144 static void |
342 initial_reordering (const hb_ot_map_t *map, | 145 initial_reordering (const hb_ot_map_t *map, |
343 hb_face_t *face, | 146 hb_face_t *face, |
344 hb_buffer_t *buffer, | 147 hb_buffer_t *buffer, |
345 void *user_data HB_UNUSED); | 148 void *user_data HB_UNUSED); |
346 static void | 149 static void |
347 final_reordering (const hb_ot_map_t *map, | 150 final_reordering (const hb_ot_map_t *map, |
348 hb_face_t *face, | 151 hb_face_t *face, |
349 hb_buffer_t *buffer, | 152 hb_buffer_t *buffer, |
350 void *user_data HB_UNUSED); | 153 void *user_data HB_UNUSED); |
351 | 154 |
352 void | 155 void |
353 _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map, const hb_
segment_properties_t *props) | 156 _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map, |
| 157 » » » » » const hb_segment_properties_t *prop
s HB_UNUSED) |
354 { | 158 { |
355 map->add_bool_feature (HB_TAG('l','o','c','l')); | 159 map->add_bool_feature (HB_TAG('l','o','c','l')); |
356 /* The Indic specs do not require ccmp, but we apply it here since if | 160 /* The Indic specs do not require ccmp, but we apply it here since if |
357 * there is a use of it, it's typically at the beginning. */ | 161 * there is a use of it, it's typically at the beginning. */ |
358 map->add_bool_feature (HB_TAG('c','c','m','p')); | 162 map->add_bool_feature (HB_TAG('c','c','m','p')); |
359 | 163 |
360 map->add_gsub_pause (initial_reordering, NULL); | 164 map->add_gsub_pause (initial_reordering, NULL); |
361 | 165 |
362 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) | 166 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) { |
363 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].
is_global); | 167 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].
is_global); |
| 168 map->add_gsub_pause (NULL, NULL); |
| 169 } |
364 | 170 |
365 map->add_gsub_pause (final_reordering, NULL); | 171 map->add_gsub_pause (final_reordering, NULL); |
366 | 172 |
367 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) | 173 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) { |
368 map->add_bool_feature (indic_other_features[i], true); | 174 map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].
is_global); |
| 175 map->add_gsub_pause (NULL, NULL); |
| 176 } |
369 } | 177 } |
370 | 178 |
371 | 179 |
372 bool | 180 hb_ot_shape_normalization_mode_t |
373 _hb_ot_shape_complex_prefer_decomposed_indic (void) | 181 _hb_ot_shape_complex_normalization_preference_indic (void) |
374 { | 182 { |
375 /* We want split matras decomposed by the common shaping logic. */ | 183 /* We want split matras decomposed by the common shaping logic. */ |
376 return TRUE; | 184 return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; |
377 } | 185 } |
378 | 186 |
379 | 187 |
380 void | 188 void |
381 _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map, hb_buffer_t *buffer) | 189 _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, |
| 190 » » » » » hb_buffer_t *buffer, |
| 191 » » » » » hb_font_t *font HB_UNUSED) |
382 { | 192 { |
383 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); | 193 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); |
384 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); | 194 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); |
385 | 195 |
386 /* We cannot setup masks here. We save information about characters | 196 /* We cannot setup masks here. We save information about characters |
387 * and setup masks later on in a pause-callback. */ | 197 * and setup masks later on in a pause-callback. */ |
388 | 198 |
389 unsigned int count = buffer->len; | 199 unsigned int count = buffer->len; |
390 for (unsigned int i = 0; i < count; i++) | 200 for (unsigned int i = 0; i < count; i++) |
391 { | 201 { |
392 unsigned int type = get_indic_categories (buffer->info[i].codepoint); | 202 hb_glyph_info_t &info = buffer->info[i]; |
| 203 unsigned int type = get_indic_categories (info.codepoint); |
393 | 204 |
394 buffer->info[i].indic_category() = type & 0x0F; | 205 info.indic_category() = type & 0x0F; |
395 buffer->info[i].indic_position() = type >> 4; | 206 info.indic_position() = type >> 4; |
396 | 207 |
397 if (buffer->info[i].indic_category() == OT_C) { | 208 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe |
398 buffer->info[i].indic_position() = consonant_position (buffer->info[i].cod
epoint); | 209 * treats U+0951..U+0952 all as OT_VD. |
399 if (is_ra (buffer->info[i].codepoint)) | 210 * TESTS: |
400 » buffer->info[i].indic_category() = OT_Ra; | 211 * U+092E,U+0947,U+0952 |
401 } else if (buffer->info[i].codepoint == 0x200C) | 212 * U+092E,U+0952,U+0947 |
402 buffer->info[i].indic_category() = OT_ZWNJ; | 213 * U+092E,U+0947,U+0951 |
403 else if (buffer->info[i].codepoint == 0x200D) | 214 * U+092E,U+0951,U+0947 |
404 buffer->info[i].indic_category() = OT_ZWJ; | 215 * */ |
| 216 if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954))) |
| 217 info.indic_category() = OT_VD; |
| 218 |
| 219 if (info.indic_category() == OT_C) { |
| 220 info.indic_position() = consonant_position (info.codepoint); |
| 221 if (is_ra (info.codepoint)) |
| 222 » info.indic_category() = OT_Ra; |
| 223 } else if (info.indic_category() == OT_SM || |
| 224 » info.indic_category() == OT_VD) { |
| 225 info.indic_position() = POS_SMVD; |
| 226 } else if (unlikely (info.codepoint == 0x200C)) |
| 227 info.indic_category() = OT_ZWNJ; |
| 228 else if (unlikely (info.codepoint == 0x200D)) |
| 229 info.indic_category() = OT_ZWJ; |
| 230 else if (unlikely (info.codepoint == 0x25CC)) |
| 231 info.indic_category() = OT_DOTTEDCIRCLE; |
405 } | 232 } |
406 } | 233 } |
407 | 234 |
408 static int | 235 static int |
409 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) | 236 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) |
410 { | 237 { |
411 int a = pa->indic_position(); | 238 int a = pa->indic_position(); |
412 int b = pb->indic_position(); | 239 int b = pb->indic_position(); |
413 | 240 |
414 return a < b ? -1 : a == b ? 0 : +1; | 241 return a < b ? -1 : a == b ? 0 : +1; |
415 } | 242 } |
416 | 243 |
| 244 /* Rules from: |
| 245 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ |
| 246 |
417 static void | 247 static void |
418 found_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t
*mask_array, | 248 initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
er, hb_mask_t *mask_array, |
419 » » » unsigned int start, unsigned int end) | 249 » » » » unsigned int start, unsigned int end) |
420 { | 250 { |
421 unsigned int i; | |
422 hb_glyph_info_t *info = buffer->info; | 251 hb_glyph_info_t *info = buffer->info; |
423 | 252 |
424 /* Comments from: | |
425 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ | |
426 | 253 |
427 /* 1. Find base consonant: | 254 /* 1. Find base consonant: |
428 * | 255 * |
429 * The shaping engine finds the base consonant of the syllable, using the | 256 * The shaping engine finds the base consonant of the syllable, using the |
430 * following algorithm: starting from the end of the syllable, move backwards | 257 * following algorithm: starting from the end of the syllable, move backwards |
431 * until a consonant is found that does not have a below-base or post-base | 258 * until a consonant is found that does not have a below-base or post-base |
432 * form (post-base forms have to follow below-base forms), or that is not a | 259 * form (post-base forms have to follow below-base forms), or that is not a |
433 * pre-base reordering Ra, or arrive at the first consonant. The consonant | 260 * pre-base reordering Ra, or arrive at the first consonant. The consonant |
434 * stopped at will be the base. | 261 * stopped at will be the base. |
435 * | 262 * |
436 * o If the syllable starts with Ra + Halant (in a script that has Reph) | 263 * o If the syllable starts with Ra + Halant (in a script that has Reph) |
437 * and has more than one consonant, Ra is excluded from candidates for | 264 * and has more than one consonant, Ra is excluded from candidates for |
438 * base consonants. | 265 * base consonants. |
439 */ | 266 */ |
440 | 267 |
441 unsigned int base = end; | 268 unsigned int base = end; |
| 269 bool has_reph = false; |
442 | 270 |
443 /* -> starting from the end of the syllable, move backwards */ | 271 { |
444 i = end; | 272 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
445 unsigned int limit = start; | 273 * and has more than one consonant, Ra is excluded from candidates for |
446 if (info[start].indic_category() == OT_Ra && start + 2 <= end) { | 274 * base consonants. */ |
447 limit += 2; | 275 unsigned int limit = start; |
448 base = start; | 276 if (mask_array[RPHF] && |
449 }; | 277 » start + 3 <= end && |
450 do { | 278 » info[start].indic_category() == OT_Ra && |
451 i--; | 279 » info[start + 1].indic_category() == OT_H && |
452 /* -> until a consonant is found */ | 280 » !is_joiner (info[start + 2])) |
453 if (is_consonant (info[i])) | |
454 { | 281 { |
455 /* -> that does not have a below-base or post-base form | 282 limit += 2; |
456 * (post-base forms have to follow below-base forms), */ | 283 base = start; |
457 if (info[i].indic_position() != POS_BELOW && | 284 has_reph = true; |
458 » info[i].indic_position() != POS_POST) | 285 }; |
| 286 |
| 287 /* -> starting from the end of the syllable, move backwards */ |
| 288 unsigned int i = end; |
| 289 do { |
| 290 i--; |
| 291 /* -> until a consonant is found */ |
| 292 if (is_consonant (info[i])) |
459 { | 293 { |
460 base = i; | 294 » /* -> that does not have a below-base or post-base form |
461 » break; | 295 » * (post-base forms have to follow below-base forms), */ |
| 296 » if (info[i].indic_position() != POS_BELOW_C && |
| 297 » info[i].indic_position() != POS_POST_C) |
| 298 » { |
| 299 » base = i; |
| 300 » break; |
| 301 » } |
| 302 |
| 303 » /* -> or that is not a pre-base reordering Ra, |
| 304 » * |
| 305 » * TODO |
| 306 » */ |
| 307 |
| 308 » /* -> or arrive at the first consonant. The consonant stopped at will |
| 309 » * be the base. */ |
| 310 » base = i; |
462 } | 311 } |
| 312 else |
| 313 if (is_joiner (info[i])) |
| 314 break; |
| 315 } while (i > limit); |
| 316 if (base < start) |
| 317 base = start; /* Just in case... */ |
463 | 318 |
464 /* -> or that is not a pre-base reordering Ra, | |
465 * | |
466 * TODO | |
467 */ | |
468 | 319 |
469 /* -> o If the syllable starts with Ra + Halant (in a script that has Rep
h) | 320 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
470 * and has more than one consonant, Ra is excluded from candidates f
or | 321 * and has more than one consonant, Ra is excluded from candidates for |
471 * base consonants. | 322 * base consonants. */ |
472 * | 323 if (has_reph && base == start) { |
473 * IMPLEMENTATION NOTES: | 324 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ |
474 * | 325 has_reph = false; |
475 * We do this by adjusting limit accordingly before entering the loop. | |
476 */ | |
477 | |
478 /* -> or arrive at the first consonant. The consonant stopped at will | |
479 * be the base. */ | |
480 base = i; | |
481 } | 326 } |
482 else | 327 } |
483 if (is_joiner (info[i])) | |
484 break; | |
485 } while (i > limit); | |
486 if (base < start) | |
487 base = start; /* Just in case... */ | |
488 | 328 |
489 | 329 |
490 /* 2. Decompose and reorder Matras: | 330 /* 2. Decompose and reorder Matras: |
491 * | 331 * |
492 * Each matra and any syllable modifier sign in the cluster are moved to the | 332 * Each matra and any syllable modifier sign in the cluster are moved to the |
493 * appropriate position relative to the consonant(s) in the cluster. The | 333 * appropriate position relative to the consonant(s) in the cluster. The |
494 * shaping engine decomposes two- or three-part matras into their constituent | 334 * shaping engine decomposes two- or three-part matras into their constituent |
495 * parts before any repositioning. Matra characters are classified by which | 335 * parts before any repositioning. Matra characters are classified by which |
496 * consonant in a conjunct they have affinity for and are reordered to the | 336 * consonant in a conjunct they have affinity for and are reordered to the |
497 * following positions: | 337 * following positions: |
(...skipping 16 matching lines...) Expand all Loading... |
514 * if necessary, so that the nukta is first. | 354 * if necessary, so that the nukta is first. |
515 * | 355 * |
516 * IMPLEMENTATION NOTES: | 356 * IMPLEMENTATION NOTES: |
517 * | 357 * |
518 * We don't need to do this: the normalize() routine already did this for us. | 358 * We don't need to do this: the normalize() routine already did this for us. |
519 */ | 359 */ |
520 | 360 |
521 | 361 |
522 /* Reorder characters */ | 362 /* Reorder characters */ |
523 | 363 |
524 for (i = start; i < base; i++) | 364 for (unsigned int i = start; i < base; i++) |
525 info[i].indic_position() = POS_PRE; | 365 info[i].indic_position() = POS_PRE_C; |
526 info[base].indic_position() = POS_BASE; | 366 info[base].indic_position() = POS_BASE_C; |
527 | |
528 | 367 |
529 /* Handle beginning Ra */ | 368 /* Handle beginning Ra */ |
530 if (start + 3 <= end && | 369 if (has_reph) |
531 info[start].indic_category() == OT_Ra && | 370 info[start].indic_position() = POS_RA_TO_BECOME_REPH; |
532 info[start + 1].indic_category() == OT_H && | |
533 !is_joiner (info[start + 2])) | |
534 { | |
535 info[start].indic_position() = POS_POST; | |
536 info[start].mask = mask_array[RPHF]; | |
537 } | |
538 | 371 |
539 /* For old-style Indic script tags, move the first post-base Halant after | 372 /* For old-style Indic script tags, move the first post-base Halant after |
540 * last consonant. */ | 373 * last consonant. */ |
541 if ((map->get_chosen_script (0) & 0x000000FF) != '2') { | 374 if ((map->get_chosen_script (0) & 0x000000FF) != '2') { |
542 /* We should only do this for Indic scripts which have a version two I guess
. */ | 375 /* We should only do this for Indic scripts which have a version two I guess
. */ |
543 for (i = base + 1; i < end; i++) | 376 for (unsigned int i = base + 1; i < end; i++) |
544 if (info[i].indic_category() == OT_H) { | 377 if (info[i].indic_category() == OT_H) { |
545 unsigned int j; | 378 unsigned int j; |
546 for (j = end - 1; j > i; j--) | 379 for (j = end - 1; j > i; j--) |
547 » if ((FLAG (info[j].indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra)))) | 380 » if (is_consonant (info[j])) |
548 break; | 381 break; |
549 if (j > i) { | 382 if (j > i) { |
550 /* Move Halant to after last consonant. */ | 383 /* Move Halant to after last consonant. */ |
551 hb_glyph_info_t t = info[i]; | 384 hb_glyph_info_t t = info[i]; |
552 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); | 385 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); |
553 info[j] = t; | 386 info[j] = t; |
554 } | 387 } |
555 break; | 388 break; |
556 } | 389 } |
557 } | 390 } |
558 | 391 |
559 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ | 392 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ |
560 for (i = start + 1; i < end; i++) | 393 if (!options.uniscribe_bug_compatible) |
561 if ((FLAG (info[i].indic_category()) & | 394 { |
562 » (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) | 395 /* Please update the Uniscribe branch when touching this! */ |
563 info[i].indic_position() = info[i - 1].indic_position(); | 396 for (unsigned int i = start + 1; i < end; i++) |
| 397 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | F
LAG (OT_N) | FLAG (OT_H)))) |
| 398 » info[i].indic_position() = info[i - 1].indic_position(); |
| 399 } else { |
| 400 /* |
| 401 * Uniscribe doesn't move the Halant with Left Matra. |
| 402 * TEST: U+092B,U+093F,U+094DE |
| 403 */ |
| 404 /* Please update the non-Uniscribe branch when touching this! */ |
| 405 for (unsigned int i = start + 1; i < end; i++) |
| 406 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | F
LAG (OT_N) | FLAG (OT_H)))) { |
| 407 » info[i].indic_position() = info[i - 1].indic_position(); |
| 408 » if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_
PRE_M) |
| 409 » for (unsigned int j = i; j > start; j--) |
| 410 » if (info[j - 1].indic_position() != POS_PRE_M) { |
| 411 » info[i].indic_position() = info[j - 1].indic_position(); |
| 412 » break; |
| 413 » } |
| 414 } |
| 415 } |
564 | 416 |
565 /* We do bubble-sort, skip malicious clusters attempts */ | 417 /* We do bubble-sort, skip malicious clusters attempts */ |
566 if (end - start > 20) | 418 if (end - start < 64) |
567 return; | 419 { |
568 | 420 /* Sit tight, rock 'n roll! */ |
569 /* Sit tight, rock 'n roll! */ | 421 hb_bubble_sort (info + start, end - start, compare_indic_order); |
570 hb_bubble_sort (info + start, end - start, compare_indic_order); | 422 /* Find base again */ |
| 423 base = end; |
| 424 for (unsigned int i = start; i < end; i++) |
| 425 if (info[i].indic_position() == POS_BASE_C) { |
| 426 base = i; |
| 427 » break; |
| 428 } |
| 429 } |
571 | 430 |
572 /* Setup masks now */ | 431 /* Setup masks now */ |
573 | 432 |
574 { | 433 { |
575 hb_mask_t mask; | 434 hb_mask_t mask; |
576 | 435 |
| 436 /* Reph */ |
| 437 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_T
O_BECOME_REPH; i++) |
| 438 info[i].mask |= mask_array[RPHF]; |
| 439 |
577 /* Pre-base */ | 440 /* Pre-base */ |
578 mask = mask_array[HALF] | mask_array[AKHN] | mask_array[CJCT]; | 441 mask = mask_array[HALF] | mask_array[AKHN] | mask_array[CJCT]; |
579 for (i = start; i < base; i++) | 442 for (unsigned int i = start; i < base; i++) |
580 info[i].mask |= mask; | 443 info[i].mask |= mask; |
581 /* Base */ | 444 /* Base */ |
582 mask = mask_array[AKHN] | mask_array[CJCT]; | 445 mask = mask_array[AKHN] | mask_array[CJCT]; |
583 info[base].mask |= mask; | 446 info[base].mask |= mask; |
584 /* Post-base */ | 447 /* Post-base */ |
585 mask = mask_array[BLWF] | mask_array[PSTF] | mask_array[CJCT]; | 448 mask = mask_array[BLWF] | mask_array[PSTF] | mask_array[CJCT]; |
586 for (i = base + 1; i < end; i++) | 449 for (unsigned int i = base + 1; i < end; i++) |
587 info[i].mask |= mask; | 450 info[i].mask |= mask; |
588 } | 451 } |
589 | 452 |
590 /* Apply ZWJ/ZWNJ effects */ | 453 /* Apply ZWJ/ZWNJ effects */ |
591 for (i = start + 1; i < end; i++) | 454 for (unsigned int i = start + 1; i < end; i++) |
592 if (is_joiner (info[i])) { | 455 if (is_joiner (info[i])) { |
593 bool non_joiner = info[i].indic_category() == OT_ZWNJ; | 456 bool non_joiner = info[i].indic_category() == OT_ZWNJ; |
594 unsigned int j = i; | 457 unsigned int j = i; |
595 | 458 |
596 do { | 459 do { |
597 j--; | 460 j--; |
598 | 461 |
599 » /* Reading the Unicode and OpenType specs, I think the following line | 462 » info[j].mask &= ~mask_array[CJCT]; |
600 » * is correct, but this is not what the test suite expects currently. | |
601 » * The test suite has been drinking, not me... But disable while | |
602 » * investigating. | |
603 » */ | |
604 » //info[j].mask &= !mask_array[CJCT]; | |
605 if (non_joiner) | 463 if (non_joiner) |
606 » info[j].mask &= !mask_array[HALF]; | 464 » info[j].mask &= ~mask_array[HALF]; |
607 | 465 |
608 } while (j > start && !is_consonant (info[j])); | 466 } while (j > start && !is_consonant (info[j])); |
609 } | 467 } |
610 } | 468 } |
611 | 469 |
612 | 470 |
613 static void | 471 static void |
614 found_vowel_syllable (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *ma
sk_array, | 472 initial_reordering_vowel_syllable (const hb_ot_map_t *map, |
615 » » unsigned int start, unsigned int end) | 473 » » » » hb_buffer_t *buffer, |
| 474 » » » » hb_mask_t *mask_array, |
| 475 » » » » unsigned int start, unsigned int end) |
616 { | 476 { |
617 /* TODO | 477 /* We made the vowels look like consonants. So let's call the consonant logic
! */ |
618 * Not clear to me how this should work. Do the matras move to before the | 478 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end); |
619 * independent vowel? No idea. | |
620 */ | |
621 } | 479 } |
622 | 480 |
623 static void | 481 static void |
624 found_standalone_cluster (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t
*mask_array, | 482 initial_reordering_standalone_cluster (const hb_ot_map_t *map, |
625 » » » unsigned int start, unsigned int end) | 483 » » » » hb_buffer_t *buffer, |
| 484 » » » » hb_mask_t *mask_array, |
| 485 » » » » unsigned int start, unsigned int end) |
626 { | 486 { |
627 /* TODO | 487 /* We treat NBSP/dotted-circle as if they are consonants, so we should just ch
ain. |
628 * Easiest thing to do here is to convert the NBSP to consonant and | 488 * Only if not in compatibility mode that is... */ |
629 * call found_consonant_syllable. | 489 |
630 */ | 490 if (options.uniscribe_bug_compatible) |
| 491 { |
| 492 /* For dotted-circle, this is what Uniscribe does: |
| 493 * If dotted-circle is the last glyph, it just does nothing. |
| 494 * Ie. It doesn't form Reph. */ |
| 495 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) |
| 496 return; |
| 497 } |
| 498 |
| 499 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end); |
631 } | 500 } |
632 | 501 |
633 static void | 502 static void |
634 found_non_indic (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *mask_ar
ray, | 503 initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED, |
635 » » unsigned int start, unsigned int end) | 504 » » » hb_buffer_t *buffer HB_UNUSED, |
| 505 » » » hb_mask_t *mask_array HB_UNUSED, |
| 506 » » » unsigned int start HB_UNUSED, unsigned int end HB_
UNUSED) |
636 { | 507 { |
637 /* Nothing to do right now. If we ever switch to using the output | 508 /* Nothing to do right now. If we ever switch to using the output |
638 * buffer in the reordering process, we'd need to next_glyph() here. */ | 509 * buffer in the reordering process, we'd need to next_glyph() here. */ |
639 } | 510 } |
640 | 511 |
641 #include "hb-ot-shape-complex-indic-machine.hh" | 512 #include "hb-ot-shape-complex-indic-machine.hh" |
642 | 513 |
643 static void | 514 static void |
644 remove_joiners (hb_buffer_t *buffer) | |
645 { | |
646 /* For now we remove joiners. However, Uniscbire seems to keep them | |
647 * and output a zero-width space glyph for them. It is not clear to | |
648 * me how that is supposed to interact with GSUB. */ | |
649 | |
650 buffer->clear_output (); | |
651 unsigned int count = buffer->len; | |
652 for (buffer->idx = 0; buffer->idx < count;) | |
653 if (unlikely (is_joiner (buffer->info[buffer->idx]))) | |
654 buffer->skip_glyph (); | |
655 else | |
656 buffer->next_glyph (); | |
657 | |
658 buffer->swap_buffers (); | |
659 } | |
660 | |
661 static void | |
662 initial_reordering (const hb_ot_map_t *map, | 515 initial_reordering (const hb_ot_map_t *map, |
663 » » hb_face_t *face, | 516 » » hb_face_t *face HB_UNUSED, |
664 hb_buffer_t *buffer, | 517 hb_buffer_t *buffer, |
665 void *user_data HB_UNUSED) | 518 void *user_data HB_UNUSED) |
666 { | 519 { |
667 hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; | 520 hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; |
668 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); | 521 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); |
669 for (unsigned int i = 0; i < num_masks; i++) | 522 for (unsigned int i = 0; i < num_masks; i++) |
670 mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); | 523 mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); |
671 | 524 |
672 find_syllables (map, buffer, mask_array); | 525 find_syllables (map, buffer, mask_array); |
673 | |
674 remove_joiners (buffer); | |
675 } | 526 } |
676 | 527 |
677 static void | 528 static void |
678 final_reordering (const hb_ot_map_t *map, | 529 final_reordering_syllable (hb_buffer_t *buffer, hb_mask_t *mask_array, |
679 » » hb_face_t *face, | 530 » » » unsigned int start, unsigned int end) |
680 » » hb_buffer_t *buffer, | |
681 » » void *user_data HB_UNUSED) | |
682 { | 531 { |
| 532 hb_glyph_info_t *info = buffer->info; |
| 533 |
683 /* 4. Final reordering: | 534 /* 4. Final reordering: |
684 * | 535 * |
685 * After the localized forms and basic shaping forms GSUB features have been | 536 * After the localized forms and basic shaping forms GSUB features have been |
686 * applied (see below), the shaping engine performs some final glyph | 537 * applied (see below), the shaping engine performs some final glyph |
687 * reordering before applying all the remaining font features to the entire | 538 * reordering before applying all the remaining font features to the entire |
688 * cluster. | 539 * cluster. |
689 * | 540 */ |
690 * o Reorder matras: | 541 |
| 542 /* Find base again */ |
| 543 unsigned int base = end; |
| 544 for (unsigned int i = start; i < end; i++) |
| 545 if (info[i].indic_position() == POS_BASE_C) { |
| 546 base = i; |
| 547 break; |
| 548 } |
| 549 |
| 550 if (base == start) { |
| 551 /* There's no Reph, and no left Matra to reposition. Just merge the cluster |
| 552 * and go home. */ |
| 553 buffer->merge_clusters (start, end); |
| 554 return; |
| 555 } |
| 556 |
| 557 unsigned int start_of_last_cluster = base; |
| 558 |
| 559 /* o Reorder matras: |
691 * | 560 * |
692 * If a pre-base matra character had been reordered before applying basic | 561 * If a pre-base matra character had been reordered before applying basic |
693 * features, the glyph can be moved closer to the main consonant based on | 562 * features, the glyph can be moved closer to the main consonant based on |
694 * whether half-forms had been formed. Actual position for the matra is | 563 * whether half-forms had been formed. Actual position for the matra is |
695 * defined as “after last standalone halant glyph, after initial matra | 564 * defined as “after last standalone halant glyph, after initial matra |
696 * position and before the main consonant”. If ZWJ or ZWNJ follow this | 565 * position and before the main consonant”. If ZWJ or ZWNJ follow this |
697 * halant, position is moved after it. | 566 * halant, position is moved after it. |
698 * | 567 */ |
699 * o Reorder reph: | 568 |
| 569 { |
| 570 unsigned int new_matra_pos = base - 1; |
| 571 while (new_matra_pos > start && |
| 572 » !(FLAG (info[new_matra_pos].indic_category()) & (FLAG (OT_M) | FLAG (
OT_H)))) |
| 573 new_matra_pos--; |
| 574 /* If we found no Halant we are done. Otherwise only proceed if the Halant
does |
| 575 * not belong to the Matra itself! */ |
| 576 if (info[new_matra_pos].indic_category() == OT_H && |
| 577 » info[new_matra_pos].indic_position() != POS_PRE_M) { |
| 578 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ |
| 579 if (new_matra_pos + 1 < end && is_joiner (info[new_matra_pos + 1])) |
| 580 » new_matra_pos++; |
| 581 |
| 582 /* Now go see if there's actually any matras... */ |
| 583 for (unsigned int i = new_matra_pos; i > start; i--) |
| 584 » if (info[i - 1].indic_position () == POS_PRE_M) |
| 585 » { |
| 586 » unsigned int old_matra_pos = i - 1; |
| 587 » hb_glyph_info_t matra = info[old_matra_pos]; |
| 588 » memmove (&info[old_matra_pos], &info[old_matra_pos + 1], (new_matra_po
s - old_matra_pos) * sizeof (info[0])); |
| 589 » info[new_matra_pos] = matra; |
| 590 » start_of_last_cluster = MIN (new_matra_pos, start_of_last_cluster); |
| 591 » new_matra_pos--; |
| 592 » } |
| 593 } |
| 594 } |
| 595 |
| 596 |
| 597 /* o Reorder reph: |
700 * | 598 * |
701 * Reph’s original position is always at the beginning of the syllable, | 599 * Reph’s original position is always at the beginning of the syllable, |
702 * (i.e. it is not reordered at the character reordering stage). However, | 600 * (i.e. it is not reordered at the character reordering stage). However, |
703 * it will be reordered according to the basic-forms shaping results. | 601 * it will be reordered according to the basic-forms shaping results. |
704 * Possible positions for reph, depending on the script, are; after main, | 602 * Possible positions for reph, depending on the script, are; after main, |
705 * before post-base consonant forms, and after post-base consonant forms. | 603 * before post-base consonant forms, and after post-base consonant forms. |
706 * | 604 */ |
707 * 1. If reph should be positioned after post-base consonant forms, | 605 |
708 * proceed to step 5. | 606 /* If there's anything after the Ra that has the REPH pos, it ought to be hala
nt. |
709 * | 607 * Which means that the font has failed to ligate the Reph. In which case, we |
710 * 2. If the reph repositioning class is not after post-base: target | 608 * shouldn't move. */ |
711 * position is after the first explicit halant glyph between the | 609 if (start + 1 < end && |
712 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ | 610 info[start].indic_position() == POS_RA_TO_BECOME_REPH && |
713 * are following this halant, position is moved after it. If such | 611 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH) |
714 * position is found, this is the target position. Otherwise, | 612 { |
715 * proceed to the next step. | 613 unsigned int new_reph_pos; |
716 * | 614 |
717 * Note: in old-implementation fonts, where classifications were | 615 enum reph_position_t { |
718 * fixed in shaping engine, there was no case where reph position | 616 REPH_AFTER_MAIN, |
719 * will be found on this step. | 617 REPH_BEFORE_SUBSCRIPT, |
720 * | 618 REPH_AFTER_SUBSCRIPT, |
721 * 3. If reph should be repositioned after the main consonant: from the | 619 REPH_BEFORE_POSTSCRIPT, |
722 * first consonant not ligated with main, or find the first | 620 REPH_AFTER_POSTSCRIPT, |
723 * consonant that is not a potential pre-base reordering Ra. | 621 } reph_pos; |
724 * | 622 |
725 * | 623 /* XXX Figure out old behavior too */ |
726 * 4. If reph should be positioned before post-base consonant, find | 624 switch ((hb_tag_t) buffer->props.script) |
727 * first post-base classified consonant not ligated with main. If no | 625 { |
728 * consonant is found, the target position should be before the | 626 case HB_SCRIPT_MALAYALAM: |
729 * first matra, syllable modifier sign or vedic sign. | 627 case HB_SCRIPT_ORIYA: |
730 * | 628 » reph_pos = REPH_AFTER_MAIN; |
731 * 5. If no consonant is found in steps 3 or 4, move reph to a position | 629 » break; |
732 * immediately before the first post-base matra, syllable modifier | 630 |
733 * sign or vedic sign that has a reordering class after the intended | 631 case HB_SCRIPT_GURMUKHI: |
734 * reph position. For example, if the reordering position for reph | 632 » reph_pos = REPH_BEFORE_SUBSCRIPT; |
735 * is post-main, it will skip above-base matras that also have a | 633 » break; |
736 * post-main position. | 634 |
737 * | 635 case HB_SCRIPT_BENGALI: |
738 * 6. Otherwise, reorder reph to the end of the syllable. | 636 » reph_pos = REPH_AFTER_SUBSCRIPT; |
739 * | 637 » break; |
740 * o Reorder pre-base reordering consonants: | 638 |
| 639 default: |
| 640 case HB_SCRIPT_DEVANAGARI: |
| 641 case HB_SCRIPT_GUJARATI: |
| 642 » reph_pos = REPH_BEFORE_POSTSCRIPT; |
| 643 » break; |
| 644 |
| 645 case HB_SCRIPT_KANNADA: |
| 646 case HB_SCRIPT_TAMIL: |
| 647 case HB_SCRIPT_TELUGU: |
| 648 » reph_pos = REPH_AFTER_POSTSCRIPT; |
| 649 » break; |
| 650 } |
| 651 |
| 652 /* 1. If reph should be positioned after post-base consonant forms, |
| 653 * proceed to step 5. |
| 654 */ |
| 655 if (reph_pos == REPH_AFTER_POSTSCRIPT) |
| 656 { |
| 657 goto reph_step_5; |
| 658 } |
| 659 |
| 660 /* 2. If the reph repositioning class is not after post-base: target |
| 661 * position is after the first explicit halant glyph between the |
| 662 * first post-reph consonant and last main consonant. If ZWJ or ZWN
J |
| 663 * are following this halant, position is moved after it. If such |
| 664 * position is found, this is the target position. Otherwise, |
| 665 * proceed to the next step. |
| 666 * |
| 667 * Note: in old-implementation fonts, where classifications were |
| 668 * fixed in shaping engine, there was no case where reph position |
| 669 * will be found on this step. |
| 670 */ |
| 671 { |
| 672 new_reph_pos = start + 1; |
| 673 while (new_reph_pos < base && info[new_reph_pos].indic_category() != OT_H) |
| 674 » new_reph_pos++; |
| 675 |
| 676 if (new_reph_pos < base && info[new_reph_pos].indic_category() == OT_H) { |
| 677 » /* ->If ZWJ or ZWNJ are following this halant, position is moved after i
t. */ |
| 678 » if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) |
| 679 » new_reph_pos++; |
| 680 » goto reph_move; |
| 681 } |
| 682 } |
| 683 |
| 684 /* 3. If reph should be repositioned after the main consonant: find th
e |
| 685 * first consonant not ligated with main, or find the first |
| 686 * consonant that is not a potential pre-base reordering Ra. |
| 687 */ |
| 688 if (reph_pos == REPH_AFTER_MAIN) |
| 689 { |
| 690 /* XXX */ |
| 691 } |
| 692 |
| 693 /* 4. If reph should be positioned before post-base consonant, find |
| 694 * first post-base classified consonant not ligated with main. If n
o |
| 695 * consonant is found, the target position should be before the |
| 696 * first matra, syllable modifier sign or vedic sign. |
| 697 */ |
| 698 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ |
| 699 if (reph_pos == REPH_AFTER_SUBSCRIPT) |
| 700 { |
| 701 new_reph_pos = base; |
| 702 while (new_reph_pos < end && |
| 703 » !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST
_C) | FLAG (POS_POST_M) | FLAG (POS_SMVD)))) |
| 704 » new_reph_pos++; |
| 705 if (new_reph_pos < end) |
| 706 goto reph_move; |
| 707 } |
| 708 |
| 709 /* 5. If no consonant is found in steps 3 or 4, move reph to a positio
n |
| 710 * immediately before the first post-base matra, syllable modifier |
| 711 * sign or vedic sign that has a reordering class after the intende
d |
| 712 * reph position. For example, if the reordering position for reph |
| 713 * is post-main, it will skip above-base matras that also have a |
| 714 * post-main position. |
| 715 */ |
| 716 reph_step_5: |
| 717 { |
| 718 /* XXX */ |
| 719 } |
| 720 |
| 721 /* 6. Otherwise, reorder reph to the end of the syllable. |
| 722 */ |
| 723 { |
| 724 new_reph_pos = end - 1; |
| 725 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_
SMVD) |
| 726 » new_reph_pos--; |
| 727 |
| 728 /* |
| 729 * If the Reph is to be ending up after a Matra,Halant sequence, |
| 730 * position it before that Halant so it can interact with the Matra. |
| 731 * However, if it's a plain Consonant,Halant we shouldn't do that. |
| 732 * Uniscribe doesn't do this. |
| 733 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
| 734 */ |
| 735 if (!options.uniscribe_bug_compatible && |
| 736 » unlikely (info[new_reph_pos].indic_category() == OT_H)) { |
| 737 » for (unsigned int i = base + 1; i < new_reph_pos; i++) |
| 738 » if (info[i].indic_category() == OT_M) { |
| 739 » /* Ok, got it. */ |
| 740 » new_reph_pos--; |
| 741 » } |
| 742 } |
| 743 goto reph_move; |
| 744 } |
| 745 |
| 746 reph_move: |
| 747 { |
| 748 /* Move */ |
| 749 hb_glyph_info_t reph = info[start]; |
| 750 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (
info[0])); |
| 751 info[new_reph_pos] = reph; |
| 752 start_of_last_cluster = start; /* Yay, one big cluster! */ |
| 753 } |
| 754 } |
| 755 |
| 756 |
| 757 /* o Reorder pre-base reordering consonants: |
741 * | 758 * |
742 * If a pre-base reordering consonant is found, reorder it according to | 759 * If a pre-base reordering consonant is found, reorder it according to |
743 * the following rules: | 760 * the following rules: |
744 * | 761 * |
745 * 1. Only reorder a glyph produced by substitution during application | 762 * 1. Only reorder a glyph produced by substitution during application |
746 * of the feature. (Note that a font may shape a Ra consonant with | 763 * of the feature. (Note that a font may shape a Ra consonant with |
747 * the feature generally but block it in certain contexts.) | 764 * the feature generally but block it in certain contexts.) |
748 * | 765 * |
749 * 2. Try to find a target position the same way as for pre-base matra. | 766 * 2. Try to find a target position the same way as for pre-base matra. |
750 * If it is found, reorder pre-base consonant glyph. | 767 * If it is found, reorder pre-base consonant glyph. |
751 * | 768 * |
752 * 3. If position is not found, reorder immediately before main | 769 * 3. If position is not found, reorder immediately before main |
753 * consonant. | 770 * consonant. |
754 */ | 771 */ |
755 | 772 |
756 /* TODO */ | 773 /* TODO */ |
757 | 774 |
758 | 775 |
759 | 776 |
| 777 /* Apply 'init' to the Left Matra if it's a word start. */ |
| 778 if (info[start].indic_position () == POS_PRE_M && |
| 779 (!start || |
| 780 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & |
| 781 (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | |
| 782 FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | |
| 783 FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | |
| 784 FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | |
| 785 FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | |
| 786 FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | |
| 787 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | |
| 788 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))) |
| 789 info[start].mask |= mask_array[INIT]; |
| 790 |
| 791 |
| 792 |
| 793 /* Finish off the clusters and go home! */ |
| 794 |
| 795 if (!options.uniscribe_bug_compatible) |
| 796 { |
| 797 /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWN
J. |
| 798 * This means, half forms are submerged into the main consonants cluster. |
| 799 * This is unnecessary, and makes cursor positioning harder, but that's what |
| 800 * Uniscribe does. */ |
| 801 unsigned int cluster_start = start; |
| 802 for (unsigned int i = start + 1; i < start_of_last_cluster; i++) |
| 803 if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT
_ZWNJ) { |
| 804 i++; |
| 805 buffer->merge_clusters (cluster_start, i); |
| 806 cluster_start = i; |
| 807 } |
| 808 start_of_last_cluster = cluster_start; |
| 809 } |
| 810 |
| 811 buffer->merge_clusters (start_of_last_cluster, end); |
| 812 } |
| 813 |
| 814 |
| 815 static void |
| 816 final_reordering (const hb_ot_map_t *map, |
| 817 hb_face_t *face HB_UNUSED, |
| 818 hb_buffer_t *buffer, |
| 819 void *user_data HB_UNUSED) |
| 820 { |
| 821 unsigned int count = buffer->len; |
| 822 if (!count) return; |
| 823 |
| 824 hb_mask_t mask_array[ARRAY_LENGTH (indic_other_features)] = {0}; |
| 825 unsigned int num_masks = ARRAY_LENGTH (indic_other_features); |
| 826 for (unsigned int i = 0; i < num_masks; i++) |
| 827 mask_array[i] = map->get_1_mask (indic_other_features[i].tag); |
| 828 |
| 829 hb_glyph_info_t *info = buffer->info; |
| 830 unsigned int last = 0; |
| 831 unsigned int last_syllable = info[0].syllable(); |
| 832 for (unsigned int i = 1; i < count; i++) |
| 833 if (last_syllable != info[i].syllable()) { |
| 834 final_reordering_syllable (buffer, mask_array, last, i); |
| 835 last = i; |
| 836 last_syllable = info[last].syllable(); |
| 837 } |
| 838 final_reordering_syllable (buffer, mask_array, last, count); |
| 839 |
760 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); | 840 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); |
761 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); | 841 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); |
762 } | 842 } |
763 | 843 |
764 | 844 |
765 | 845 |
OLD | NEW |