OLD | NEW |
1 /* | 1 /* |
2 * Copyright © 2011,2012 Google, Inc. | 2 * Copyright © 2011,2012 Google, Inc. |
3 * | 3 * |
4 * This is part of HarfBuzz, a text shaping library. | 4 * This is part of HarfBuzz, a text shaping library. |
5 * | 5 * |
6 * Permission is hereby granted, without written agreement and without | 6 * Permission is hereby granted, without written agreement and without |
7 * license or royalty fees, to use, copy, modify, and distribute this | 7 * license or royalty fees, to use, copy, modify, and distribute this |
8 * software and its documentation for any purpose, provided that the | 8 * software and its documentation for any purpose, provided that the |
9 * above copyright notice and the following two paragraphs appear in | 9 * above copyright notice and the following two paragraphs appear in |
10 * all copies of this software. | 10 * all copies of this software. |
11 * | 11 * |
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 * DAMAGE. | 16 * DAMAGE. |
17 * | 17 * |
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 * | 23 * |
24 * Google Author(s): Behdad Esfahbod | 24 * Google Author(s): Behdad Esfahbod |
25 */ | 25 */ |
26 | 26 |
27 #include "hb-ot-shape-complex-indic-private.hh" | 27 #include "hb-ot-shape-complex-indic-private.hh" |
28 #include "hb-ot-shape-private.hh" | 28 #include "hb-ot-layout-private.hh" |
| 29 |
| 30 |
| 31 /* |
| 32 * Global Indic shaper options. |
| 33 */ |
29 | 34 |
30 struct indic_options_t | 35 struct indic_options_t |
31 { | 36 { |
32 int initialized : 1; | 37 int initialized : 1; |
33 int uniscribe_bug_compatible : 1; | 38 int uniscribe_bug_compatible : 1; |
34 }; | 39 }; |
35 | 40 |
36 union indic_options_union_t { | 41 union indic_options_union_t { |
37 int i; | 42 int i; |
38 indic_options_t opts; | 43 indic_options_t opts; |
39 }; | 44 }; |
40 ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t)); | 45 ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t)); |
41 | 46 |
42 static indic_options_union_t | 47 static indic_options_union_t |
43 indic_options_init (void) | 48 indic_options_init (void) |
44 { | 49 { |
45 indic_options_union_t u; | 50 indic_options_union_t u; |
46 u.i = 0; | 51 u.i = 0; |
47 u.opts.initialized = 1; | 52 u.opts.initialized = 1; |
48 | 53 |
49 char *c = getenv ("HB_OT_INDIC_OPTIONS"); | 54 char *c = getenv ("HB_OT_INDIC_OPTIONS"); |
50 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); | 55 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); |
51 | 56 |
52 return u; | 57 return u; |
53 } | 58 } |
54 | 59 |
55 inline indic_options_t | 60 static inline indic_options_t |
56 indic_options (void) | 61 indic_options (void) |
57 { | 62 { |
58 static indic_options_union_t options; | 63 static indic_options_union_t options; |
59 | 64 |
60 if (unlikely (!options.i)) { | 65 if (unlikely (!options.i)) { |
61 /* This is idempotent and threadsafe. */ | 66 /* This is idempotent and threadsafe. */ |
62 options = indic_options_init (); | 67 options = indic_options_init (); |
63 } | 68 } |
64 | 69 |
65 return options.opts; | 70 return options.opts; |
66 } | 71 } |
67 | 72 |
68 | 73 |
69 static int | 74 /* |
70 compare_codepoint (const void *pa, const void *pb) | 75 * Indic configurations. Note that we do not want to keep every single script-s
pecific |
| 76 * behavior in these tables necessarily. This should mainly be used for per-scr
ipt |
| 77 * properties that are cheaper keeping here, than in the code. Ie. if, say, one
and |
| 78 * only one script has an exception, that one script can be if'ed directly in th
e code, |
| 79 * instead of adding a new flag in these structs. |
| 80 */ |
| 81 |
| 82 enum base_position_t { |
| 83 BASE_POS_FIRST, |
| 84 BASE_POS_LAST |
| 85 }; |
| 86 enum reph_position_t { |
| 87 REPH_POS_DEFAULT = POS_BEFORE_POST, |
| 88 |
| 89 REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, |
| 90 REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, |
| 91 REPH_POS_AFTER_SUB = POS_AFTER_SUB, |
| 92 REPH_POS_BEFORE_POST = POS_BEFORE_POST, |
| 93 REPH_POS_AFTER_POST = POS_AFTER_POST |
| 94 }; |
| 95 enum reph_mode_t { |
| 96 REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ |
| 97 REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ |
| 98 REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */ |
| 99 REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ |
| 100 }; |
| 101 struct indic_config_t |
71 { | 102 { |
72 hb_codepoint_t a = * (hb_codepoint_t *) pa; | 103 hb_script_t script; |
73 hb_codepoint_t b = * (hb_codepoint_t *) pb; | 104 bool has_old_spec; |
| 105 hb_codepoint_t virama; |
| 106 base_position_t base_pos; |
| 107 reph_position_t reph_pos; |
| 108 reph_mode_t reph_mode; |
| 109 }; |
74 | 110 |
75 return a < b ? -1 : a == b ? 0 : +1; | 111 static const indic_config_t indic_configs[] = |
76 } | 112 { |
| 113 /* Default. Should be first. */ |
| 114 {HB_SCRIPT_INVALID,» false, 0,BASE_POS_LAST, REPH_POS_DEFAULT, REPH_MO
DE_IMPLICIT}, |
| 115 {HB_SCRIPT_DEVANAGARI,true, 0x094D,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MO
DE_IMPLICIT}, |
| 116 {HB_SCRIPT_BENGALI,» true, 0x09CD,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MO
DE_IMPLICIT}, |
| 117 {HB_SCRIPT_GURMUKHI,» true, 0x0A4D,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MO
DE_IMPLICIT}, |
| 118 {HB_SCRIPT_GUJARATI,» true, 0x0ACD,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MO
DE_IMPLICIT}, |
| 119 {HB_SCRIPT_ORIYA,» true, 0x0B4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MO
DE_IMPLICIT}, |
| 120 {HB_SCRIPT_TAMIL,» true, 0x0BCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MO
DE_IMPLICIT}, |
| 121 {HB_SCRIPT_TELUGU,» true, 0x0C4D,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MO
DE_EXPLICIT}, |
| 122 {HB_SCRIPT_KANNADA,» true, 0x0CCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MO
DE_IMPLICIT}, |
| 123 {HB_SCRIPT_MALAYALAM,»true, 0x0D4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MO
DE_LOG_REPHA}, |
| 124 {HB_SCRIPT_SINHALA,» false,0x0DCA,BASE_POS_FIRST,REPH_POS_AFTER_MAIN, REPH_MO
DE_EXPLICIT}, |
| 125 {HB_SCRIPT_KHMER,» false,0x17D2,BASE_POS_FIRST,REPH_POS_DEFAULT, REPH_MO
DE_VIS_REPHA}, |
| 126 }; |
77 | 127 |
78 static indic_position_t | |
79 consonant_position (hb_codepoint_t u) | |
80 { | |
81 consonant_position_t *record; | |
82 | 128 |
83 record = (consonant_position_t *) bsearch (&u, consonant_positions, | |
84 ARRAY_LENGTH (consonant_positions), | |
85 sizeof (consonant_positions[0]), | |
86 compare_codepoint); | |
87 | 129 |
88 return record ? record->position : POS_BASE_C; | 130 /* |
89 } | 131 * Indic shaper. |
90 | 132 */ |
91 static bool | |
92 is_ra (hb_codepoint_t u) | |
93 { | |
94 return !!bsearch (&u, ra_chars, | |
95 » » ARRAY_LENGTH (ra_chars), | |
96 » » sizeof (ra_chars[0]), | |
97 » » compare_codepoint); | |
98 } | |
99 | |
100 static bool | |
101 is_joiner (const hb_glyph_info_t &info) | |
102 { | |
103 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); | |
104 } | |
105 | |
106 static bool | |
107 is_consonant (const hb_glyph_info_t &info) | |
108 { | |
109 /* Note: | |
110 * | |
111 * We treat Vowels and placeholders as if they were consonants. This is safe
because Vowels | |
112 * cannot happen in a consonant syllable. The plus side however is, we can ca
ll the | |
113 * consonant syllable logic from the vowel syllable function and get it all ri
ght! */ | |
114 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (O
T_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))); | |
115 } | |
116 | 133 |
117 struct feature_list_t { | 134 struct feature_list_t { |
118 hb_tag_t tag; | 135 hb_tag_t tag; |
119 hb_bool_t is_global; | 136 hb_bool_t is_global; |
120 }; | 137 }; |
121 | 138 |
122 static const feature_list_t | 139 static const feature_list_t |
123 indic_basic_features[] = | 140 indic_features[] = |
124 { | 141 { |
| 142 /* |
| 143 * Basic features. |
| 144 * These features are applied in order, one at a time, after initial_reorderin
g. |
| 145 */ |
125 {HB_TAG('n','u','k','t'), true}, | 146 {HB_TAG('n','u','k','t'), true}, |
126 {HB_TAG('a','k','h','n'), false}, | 147 {HB_TAG('a','k','h','n'), true}, |
127 {HB_TAG('r','p','h','f'), false}, | 148 {HB_TAG('r','p','h','f'), false}, |
128 {HB_TAG('r','k','r','f'), true}, | 149 {HB_TAG('r','k','r','f'), true}, |
129 {HB_TAG('p','r','e','f'), false}, | 150 {HB_TAG('p','r','e','f'), false}, |
| 151 {HB_TAG('h','a','l','f'), false}, |
130 {HB_TAG('b','l','w','f'), false}, | 152 {HB_TAG('b','l','w','f'), false}, |
131 {HB_TAG('h','a','l','f'), false}, | 153 {HB_TAG('a','b','v','f'), false}, |
132 {HB_TAG('p','s','t','f'), false}, | 154 {HB_TAG('p','s','t','f'), false}, |
133 {HB_TAG('c','j','c','t'), false}, | 155 {HB_TAG('c','f','a','r'), false}, |
| 156 {HB_TAG('c','j','c','t'), true}, |
134 {HB_TAG('v','a','t','u'), true}, | 157 {HB_TAG('v','a','t','u'), true}, |
135 }; | 158 /* |
136 | 159 * Other features. |
137 /* Same order as the indic_basic_features array */ | 160 * These features are applied all at once, after final_reordering. |
138 enum { | 161 */ |
139 _NUKT, | |
140 AKHN, | |
141 RPHF, | |
142 _RKRF, | |
143 PREF, | |
144 BLWF, | |
145 HALF, | |
146 PSTF, | |
147 CJCT, | |
148 VATU | |
149 }; | |
150 | |
151 static const feature_list_t | |
152 indic_other_features[] = | |
153 { | |
154 {HB_TAG('i','n','i','t'), false}, | 162 {HB_TAG('i','n','i','t'), false}, |
155 {HB_TAG('p','r','e','s'), true}, | 163 {HB_TAG('p','r','e','s'), true}, |
156 {HB_TAG('a','b','v','s'), true}, | 164 {HB_TAG('a','b','v','s'), true}, |
157 {HB_TAG('b','l','w','s'), true}, | 165 {HB_TAG('b','l','w','s'), true}, |
158 {HB_TAG('p','s','t','s'), true}, | 166 {HB_TAG('p','s','t','s'), true}, |
159 {HB_TAG('h','a','l','n'), true}, | 167 {HB_TAG('h','a','l','n'), true}, |
160 | 168 /* Positioning features, though we don't care about the types. */ |
161 {HB_TAG('d','i','s','t'), true}, | 169 {HB_TAG('d','i','s','t'), true}, |
162 {HB_TAG('a','b','v','m'), true}, | 170 {HB_TAG('a','b','v','m'), true}, |
163 {HB_TAG('b','l','w','m'), true}, | 171 {HB_TAG('b','l','w','m'), true}, |
164 }; | 172 }; |
165 | 173 |
166 /* Same order as the indic_other_features array */ | 174 /* |
| 175 * Must be in the same order as the indic_features array. |
| 176 */ |
167 enum { | 177 enum { |
168 INIT | 178 _NUKT, |
169 }; | 179 _AKHN, |
170 | 180 RPHF, |
171 | 181 _RKRF, |
172 static void | 182 PREF, |
173 initial_reordering (const hb_ot_map_t *map, | 183 HALF, |
174 » » hb_face_t *face, | 184 BLWF, |
175 » » hb_buffer_t *buffer, | 185 ABVF, |
176 » » void *user_data HB_UNUSED); | 186 PSTF, |
177 static void | 187 CFAR, |
178 final_reordering (const hb_ot_map_t *map, | 188 _CJCT, |
179 » » hb_face_t *face, | 189 _VATU, |
180 » » hb_buffer_t *buffer, | 190 |
181 » » void *user_data HB_UNUSED); | 191 INIT, |
182 | 192 _PRES, |
183 void | 193 _ABVS, |
184 _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map, | 194 _BLWS, |
185 » » » » » const hb_segment_properties_t *prop
s HB_UNUSED) | 195 _PSTS, |
186 { | 196 _HALN, |
| 197 _DIST, |
| 198 _ABVM, |
| 199 _BLWM, |
| 200 |
| 201 INDIC_NUM_FEATURES, |
| 202 INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */ |
| 203 }; |
| 204 |
| 205 static void |
| 206 setup_syllables (const hb_ot_shape_plan_t *plan, |
| 207 » » hb_font_t *font, |
| 208 » » hb_buffer_t *buffer); |
| 209 static void |
| 210 initial_reordering (const hb_ot_shape_plan_t *plan, |
| 211 » » hb_font_t *font, |
| 212 » » hb_buffer_t *buffer); |
| 213 static void |
| 214 final_reordering (const hb_ot_shape_plan_t *plan, |
| 215 » » hb_font_t *font, |
| 216 » » hb_buffer_t *buffer); |
| 217 |
| 218 static void |
| 219 collect_features_indic (hb_ot_shape_planner_t *plan) |
| 220 { |
| 221 hb_ot_map_builder_t *map = &plan->map; |
| 222 |
| 223 /* Do this before any lookups have been applied. */ |
| 224 map->add_gsub_pause (setup_syllables); |
| 225 |
187 map->add_bool_feature (HB_TAG('l','o','c','l')); | 226 map->add_bool_feature (HB_TAG('l','o','c','l')); |
188 /* The Indic specs do not require ccmp, but we apply it here since if | 227 /* The Indic specs do not require ccmp, but we apply it here since if |
189 * there is a use of it, it's typically at the beginning. */ | 228 * there is a use of it, it's typically at the beginning. */ |
190 map->add_bool_feature (HB_TAG('c','c','m','p')); | 229 map->add_bool_feature (HB_TAG('c','c','m','p')); |
191 | 230 |
192 map->add_gsub_pause (initial_reordering, NULL); | 231 |
193 | 232 unsigned int i = 0; |
194 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) { | 233 map->add_gsub_pause (initial_reordering); |
195 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].
is_global); | 234 for (; i < INDIC_BASIC_FEATURES; i++) { |
196 map->add_gsub_pause (NULL, NULL); | 235 map->add_bool_feature (indic_features[i].tag, indic_features[i].is_global); |
197 } | 236 map->add_gsub_pause (NULL); |
198 | 237 } |
199 map->add_gsub_pause (final_reordering, NULL); | 238 map->add_gsub_pause (final_reordering); |
200 | 239 for (; i < INDIC_NUM_FEATURES; i++) { |
201 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) { | 240 map->add_bool_feature (indic_features[i].tag, indic_features[i].is_global); |
202 map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].
is_global); | 241 } |
203 map->add_gsub_pause (NULL, NULL); | 242 } |
204 } | 243 |
205 } | 244 static void |
206 | 245 override_features_indic (hb_ot_shape_planner_t *plan) |
207 | 246 { |
208 hb_ot_shape_normalization_mode_t | 247 /* Uniscribe does not apply 'kern'. */ |
209 _hb_ot_shape_complex_normalization_preference_indic (void) | 248 if (indic_options ().uniscribe_bug_compatible) |
210 { | 249 plan->map.add_feature (HB_TAG('k','e','r','n'), 0, true); |
211 /* We want split matras decomposed by the common shaping logic. */ | 250 } |
212 return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; | 251 |
213 } | 252 |
214 | 253 struct would_substitute_feature_t |
215 | 254 { |
216 void | 255 inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag) |
217 _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, | 256 { |
218 » » » » » hb_buffer_t *buffer, | 257 map->get_stage_lookups (0/*GSUB*/, |
219 » » » » » hb_font_t *font HB_UNUSED) | 258 » » » map->get_feature_stage (0/*GSUB*/, feature_tag), |
| 259 » » » &lookups, &count); |
| 260 } |
| 261 |
| 262 inline bool would_substitute (hb_codepoint_t *glyphs, |
| 263 » » » » unsigned int glyphs_count, |
| 264 » » » » bool zero_context, |
| 265 » » » » hb_face_t *face) const |
| 266 { |
| 267 for (unsigned int i = 0; i < count; i++) |
| 268 if (hb_ot_layout_would_substitute_lookup_fast (face, lookups[i].index, gly
phs, glyphs_count, zero_context)) |
| 269 » return true; |
| 270 return false; |
| 271 } |
| 272 |
| 273 private: |
| 274 const hb_ot_map_t::lookup_map_t *lookups; |
| 275 unsigned int count; |
| 276 }; |
| 277 |
| 278 struct indic_shape_plan_t |
| 279 { |
| 280 ASSERT_POD (); |
| 281 |
| 282 inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const |
| 283 { |
| 284 hb_codepoint_t glyph = virama_glyph; |
| 285 if (unlikely (virama_glyph == (hb_codepoint_t) -1)) |
| 286 { |
| 287 if (!config->virama || !font->get_glyph (config->virama, 0, &glyph)) |
| 288 » glyph = 0; |
| 289 /* Technically speaking, the spec says we should apply 'locl' to virama to
o. |
| 290 * Maybe one day... */ |
| 291 |
| 292 /* Our get_glyph() function needs a font, so we can't get the virama glyph |
| 293 * during shape planning... Instead, overwrite it here. It's safe. Don'
t worry! */ |
| 294 (const_cast<indic_shape_plan_t *> (this))->virama_glyph = glyph; |
| 295 } |
| 296 |
| 297 *pglyph = glyph; |
| 298 return glyph != 0; |
| 299 } |
| 300 |
| 301 const indic_config_t *config; |
| 302 |
| 303 bool is_old_spec; |
| 304 hb_codepoint_t virama_glyph; |
| 305 |
| 306 would_substitute_feature_t pref; |
| 307 would_substitute_feature_t blwf; |
| 308 would_substitute_feature_t pstf; |
| 309 |
| 310 hb_mask_t mask_array[INDIC_NUM_FEATURES]; |
| 311 }; |
| 312 |
| 313 static void * |
| 314 data_create_indic (const hb_ot_shape_plan_t *plan) |
| 315 { |
| 316 indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (ind
ic_shape_plan_t)); |
| 317 if (unlikely (!indic_plan)) |
| 318 return NULL; |
| 319 |
| 320 indic_plan->config = &indic_configs[0]; |
| 321 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) |
| 322 if (plan->props.script == indic_configs[i].script) { |
| 323 indic_plan->config = &indic_configs[i]; |
| 324 break; |
| 325 } |
| 326 |
| 327 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.get_
chosen_script (0) & 0x000000FF) != '2'); |
| 328 indic_plan->virama_glyph = (hb_codepoint_t) -1; |
| 329 |
| 330 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f')); |
| 331 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f')); |
| 332 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f')); |
| 333 |
| 334 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) |
| 335 indic_plan->mask_array[i] = indic_features[i].is_global ? 0 : plan->map.get_
1_mask (indic_features[i].tag); |
| 336 |
| 337 return indic_plan; |
| 338 } |
| 339 |
| 340 static void |
| 341 data_destroy_indic (void *data) |
| 342 { |
| 343 free (data); |
| 344 } |
| 345 |
| 346 static indic_position_t |
| 347 consonant_position_from_face (const indic_shape_plan_t *indic_plan, |
| 348 » » » hb_codepoint_t *glyphs, unsigned int glyphs_len, |
| 349 » » » hb_face_t *face) |
| 350 { |
| 351 bool zero_context = indic_plan->is_old_spec ? false : true; |
| 352 if (indic_plan->pref.would_substitute (glyphs, glyphs_len, zero_context, face)
) return POS_BELOW_C; |
| 353 if (indic_plan->blwf.would_substitute (glyphs, glyphs_len, zero_context, face)
) return POS_BELOW_C; |
| 354 if (indic_plan->pstf.would_substitute (glyphs, glyphs_len, zero_context, face)
) return POS_POST_C; |
| 355 return POS_BASE_C; |
| 356 } |
| 357 |
| 358 |
| 359 enum syllable_type_t { |
| 360 consonant_syllable, |
| 361 vowel_syllable, |
| 362 standalone_cluster, |
| 363 broken_cluster, |
| 364 non_indic_cluster, |
| 365 }; |
| 366 |
| 367 #include "hb-ot-shape-complex-indic-machine.hh" |
| 368 |
| 369 |
| 370 static void |
| 371 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, |
| 372 » » hb_buffer_t *buffer, |
| 373 » » hb_font_t *font HB_UNUSED) |
220 { | 374 { |
221 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); | 375 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); |
222 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); | 376 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); |
223 | 377 |
224 /* We cannot setup masks here. We save information about characters | 378 /* We cannot setup masks here. We save information about characters |
225 * and setup masks later on in a pause-callback. */ | 379 * and setup masks later on in a pause-callback. */ |
226 | 380 |
227 unsigned int count = buffer->len; | 381 unsigned int count = buffer->len; |
228 for (unsigned int i = 0; i < count; i++) | 382 for (unsigned int i = 0; i < count; i++) |
229 { | 383 set_indic_properties (buffer->info[i]); |
230 hb_glyph_info_t &info = buffer->info[i]; | 384 } |
231 unsigned int type = get_indic_categories (info.codepoint); | |
232 | 385 |
233 info.indic_category() = type & 0x0F; | 386 static void |
234 info.indic_position() = type >> 4; | 387 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, |
235 | 388 » » hb_font_t *font HB_UNUSED, |
236 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe | 389 » » hb_buffer_t *buffer) |
237 * treats U+0951..U+0952 all as OT_VD. | 390 { |
238 * TESTS: | 391 find_syllables (buffer); |
239 * U+092E,U+0947,U+0952 | |
240 * U+092E,U+0952,U+0947 | |
241 * U+092E,U+0947,U+0951 | |
242 * U+092E,U+0951,U+0947 | |
243 * */ | |
244 if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954))) | |
245 info.indic_category() = OT_VD; | |
246 | |
247 if (info.indic_category() == OT_C) { | |
248 info.indic_position() = consonant_position (info.codepoint); | |
249 if (is_ra (info.codepoint)) | |
250 » info.indic_category() = OT_Ra; | |
251 } else if (info.indic_category() == OT_SM || | |
252 » info.indic_category() == OT_VD) { | |
253 info.indic_position() = POS_SMVD; | |
254 } else if (unlikely (info.codepoint == 0x200C)) | |
255 info.indic_category() = OT_ZWNJ; | |
256 else if (unlikely (info.codepoint == 0x200D)) | |
257 info.indic_category() = OT_ZWJ; | |
258 else if (unlikely (info.codepoint == 0x25CC)) | |
259 info.indic_category() = OT_DOTTEDCIRCLE; | |
260 } | |
261 } | 392 } |
262 | 393 |
263 static int | 394 static int |
264 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) | 395 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) |
265 { | 396 { |
266 int a = pa->indic_position(); | 397 int a = pa->indic_position(); |
267 int b = pb->indic_position(); | 398 int b = pb->indic_position(); |
268 | 399 |
269 return a < b ? -1 : a == b ? 0 : +1; | 400 return a < b ? -1 : a == b ? 0 : +1; |
270 } | 401 } |
271 | 402 |
| 403 |
| 404 |
| 405 static void |
| 406 update_consonant_positions (const hb_ot_shape_plan_t *plan, |
| 407 hb_font_t *font, |
| 408 hb_buffer_t *buffer) |
| 409 { |
| 410 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data
; |
| 411 |
| 412 unsigned int consonant_pos = indic_plan->is_old_spec ? 0 : 1; |
| 413 hb_codepoint_t glyphs[2]; |
| 414 if (indic_plan->get_virama_glyph (font, &glyphs[1 - consonant_pos])) |
| 415 { |
| 416 hb_face_t *face = font->face; |
| 417 unsigned int count = buffer->len; |
| 418 for (unsigned int i = 0; i < count; i++) |
| 419 if (buffer->info[i].indic_position() == POS_BASE_C) { |
| 420 glyphs[consonant_pos] = buffer->info[i].codepoint; |
| 421 buffer->info[i].indic_position() = consonant_position_from_face (indic_p
lan, glyphs, 2, face); |
| 422 } |
| 423 } |
| 424 } |
| 425 |
| 426 |
272 /* Rules from: | 427 /* Rules from: |
273 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ | 428 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ |
274 | 429 |
275 static void | 430 static void |
276 initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff
er, hb_mask_t *mask_array, | 431 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, hb_buffer
_t *buffer, |
277 unsigned int start, unsigned int end) | 432 unsigned int start, unsigned int end) |
278 { | 433 { |
| 434 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data
; |
279 hb_glyph_info_t *info = buffer->info; | 435 hb_glyph_info_t *info = buffer->info; |
280 | 436 |
281 | 437 |
282 /* 1. Find base consonant: | 438 /* 1. Find base consonant: |
283 * | 439 * |
284 * The shaping engine finds the base consonant of the syllable, using the | 440 * The shaping engine finds the base consonant of the syllable, using the |
285 * following algorithm: starting from the end of the syllable, move backwards | 441 * following algorithm: starting from the end of the syllable, move backwards |
286 * until a consonant is found that does not have a below-base or post-base | 442 * until a consonant is found that does not have a below-base or post-base |
287 * form (post-base forms have to follow below-base forms), or that is not a | 443 * form (post-base forms have to follow below-base forms), or that is not a |
288 * pre-base reordering Ra, or arrive at the first consonant. The consonant | 444 * pre-base reordering Ra, or arrive at the first consonant. The consonant |
289 * stopped at will be the base. | 445 * stopped at will be the base. |
290 * | 446 * |
291 * o If the syllable starts with Ra + Halant (in a script that has Reph) | 447 * o If the syllable starts with Ra + Halant (in a script that has Reph) |
292 * and has more than one consonant, Ra is excluded from candidates for | 448 * and has more than one consonant, Ra is excluded from candidates for |
293 * base consonants. | 449 * base consonants. |
294 */ | 450 */ |
295 | 451 |
296 unsigned int base = end; | 452 unsigned int base = end; |
297 bool has_reph = false; | 453 bool has_reph = false; |
298 | 454 |
299 { | 455 { |
300 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) | 456 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
301 * and has more than one consonant, Ra is excluded from candidates for | 457 * and has more than one consonant, Ra is excluded from candidates for |
302 * base consonants. */ | 458 * base consonants. */ |
303 unsigned int limit = start; | 459 unsigned int limit = start; |
304 if (mask_array[RPHF] && | 460 if (indic_plan->mask_array[RPHF] && |
305 start + 3 <= end && | 461 start + 3 <= end && |
306 info[start].indic_category() == OT_Ra && | 462 info[start].indic_category() == OT_Ra && |
307 info[start + 1].indic_category() == OT_H && | 463 info[start + 1].indic_category() == OT_H && |
308 » !is_joiner (info[start + 2])) | 464 » (/* TODO Handle other Reph modes. */ |
| 465 » (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (inf
o[start + 2])) || |
| 466 » (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2]
.indic_category() == OT_ZWJ) |
| 467 » )) |
309 { | 468 { |
310 limit += 2; | 469 limit += 2; |
| 470 while (limit < end && is_joiner (info[limit])) |
| 471 limit++; |
311 base = start; | 472 base = start; |
312 has_reph = true; | 473 has_reph = true; |
313 }; | 474 }; |
314 | 475 |
315 /* -> starting from the end of the syllable, move backwards */ | 476 switch (indic_plan->config->base_pos) |
316 unsigned int i = end; | 477 { |
317 do { | 478 case BASE_POS_LAST: |
318 i--; | |
319 /* -> until a consonant is found */ | |
320 if (is_consonant (info[i])) | |
321 { | 479 { |
322 » /* -> that does not have a below-base or post-base form | 480 » /* -> starting from the end of the syllable, move backwards */ |
323 » * (post-base forms have to follow below-base forms), */ | 481 » unsigned int i = end; |
324 » if (info[i].indic_position() != POS_BELOW_C && | 482 » bool seen_below = false; |
325 » info[i].indic_position() != POS_POST_C) | 483 » do { |
326 » { | 484 » i--; |
327 » base = i; | 485 » /* -> until a consonant is found */ |
328 » break; | 486 » if (is_consonant (info[i])) |
329 » } | 487 » { |
| 488 » /* -> that does not have a below-base or post-base form |
| 489 » * (post-base forms have to follow below-base forms), */ |
| 490 » if (info[i].indic_position() != POS_BELOW_C && |
| 491 » » (info[i].indic_position() != POS_POST_C || seen_below)) |
| 492 » { |
| 493 » base = i; |
| 494 » break; |
| 495 » } |
| 496 » if (info[i].indic_position() == POS_BELOW_C) |
| 497 » seen_below = true; |
330 | 498 |
331 » /* -> or that is not a pre-base reordering Ra, | 499 » /* -> or that is not a pre-base reordering Ra, |
332 » * | 500 » * |
333 » * TODO | 501 » * IMPLEMENTATION NOTES: |
334 » */ | 502 » * |
| 503 » * Our pre-base reordering Ra's are marked POS_BELOW, so will be ski
pped |
| 504 » * by the logic above already. |
| 505 » */ |
335 | 506 |
336 » /* -> or arrive at the first consonant. The consonant stopped at will | 507 » /* -> or arrive at the first consonant. The consonant stopped at wil
l |
337 » * be the base. */ | 508 » * be the base. */ |
338 » base = i; | 509 » base = i; |
| 510 » } |
| 511 » else |
| 512 » { |
| 513 » /* A ZWJ after a Halant stops the base search, and requests an expli
cit |
| 514 » * half form. |
| 515 » * A ZWJ before a Halant, requests a subjoined form instead, and hen
ce |
| 516 » * search continues. This is particularly important for Bengali |
| 517 » * sequence Ra,H,Ya that shouls form Ya-Phalaa by subjoining Ya. */ |
| 518 » if (start < i && |
| 519 » » info[i].indic_category() == OT_ZWJ && |
| 520 » » info[i - 1].indic_category() == OT_H) |
| 521 » break; |
| 522 » } |
| 523 » } while (i > limit); |
339 } | 524 } |
340 else | 525 break; |
341 » if (is_joiner (info[i])) | |
342 » break; | |
343 } while (i > limit); | |
344 if (base < start) | |
345 base = start; /* Just in case... */ | |
346 | 526 |
| 527 case BASE_POS_FIRST: |
| 528 { |
| 529 /* In scripts without half forms (eg. Khmer), the first consonant is alw
ays the base. */ |
| 530 |
| 531 if (!has_reph) |
| 532 base = limit; |
| 533 |
| 534 /* Find the last base consonant that is not blocked by ZWJ. If there is |
| 535 * a ZWJ right before a base consonant, that would request a subjoined f
orm. */ |
| 536 for (unsigned int i = limit; i < end; i++) |
| 537 if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C) |
| 538 { |
| 539 if (limit < i && info[i - 1].indic_category() == OT_ZWJ) |
| 540 break; |
| 541 else |
| 542 base = i; |
| 543 } |
| 544 |
| 545 /* Mark all subsequent consonants as below. */ |
| 546 for (unsigned int i = base + 1; i < end; i++) |
| 547 if (is_consonant (info[i]) && info[i].indic_position() == POS_BASE_C) |
| 548 info[i].indic_position() = POS_BELOW_C; |
| 549 } |
| 550 break; |
| 551 |
| 552 default: |
| 553 abort (); |
| 554 } |
347 | 555 |
348 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) | 556 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
349 * and has more than one consonant, Ra is excluded from candidates for | 557 * and has more than one consonant, Ra is excluded from candidates for |
350 * base consonants. */ | 558 * base consonants. |
351 if (has_reph && base == start) { | 559 * |
| 560 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ |
| 561 if (has_reph && base == start && start + 2 == limit) { |
352 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ | 562 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ |
353 has_reph = false; | 563 has_reph = false; |
354 } | 564 } |
355 } | 565 } |
356 | 566 |
| 567 if (base < end) |
| 568 info[base].indic_position() = POS_BASE_C; |
| 569 |
357 | 570 |
358 /* 2. Decompose and reorder Matras: | 571 /* 2. Decompose and reorder Matras: |
359 * | 572 * |
360 * Each matra and any syllable modifier sign in the cluster are moved to the | 573 * Each matra and any syllable modifier sign in the cluster are moved to the |
361 * appropriate position relative to the consonant(s) in the cluster. The | 574 * appropriate position relative to the consonant(s) in the cluster. The |
362 * shaping engine decomposes two- or three-part matras into their constituent | 575 * shaping engine decomposes two- or three-part matras into their constituent |
363 * parts before any repositioning. Matra characters are classified by which | 576 * parts before any repositioning. Matra characters are classified by which |
364 * consonant in a conjunct they have affinity for and are reordered to the | 577 * consonant in a conjunct they have affinity for and are reordered to the |
365 * following positions: | 578 * following positions: |
366 * | 579 * |
(...skipping 16 matching lines...) Expand all Loading... |
383 * | 596 * |
384 * IMPLEMENTATION NOTES: | 597 * IMPLEMENTATION NOTES: |
385 * | 598 * |
386 * We don't need to do this: the normalize() routine already did this for us. | 599 * We don't need to do this: the normalize() routine already did this for us. |
387 */ | 600 */ |
388 | 601 |
389 | 602 |
390 /* Reorder characters */ | 603 /* Reorder characters */ |
391 | 604 |
392 for (unsigned int i = start; i < base; i++) | 605 for (unsigned int i = start; i < base; i++) |
393 info[i].indic_position() = POS_PRE_C; | 606 info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_
position()); |
394 info[base].indic_position() = POS_BASE_C; | 607 |
| 608 if (base < end) |
| 609 info[base].indic_position() = POS_BASE_C; |
| 610 |
| 611 /* Mark final consonants. A final consonant is one appearing after a matra, |
| 612 * like in Khmer. */ |
| 613 for (unsigned int i = base + 1; i < end; i++) |
| 614 if (info[i].indic_category() == OT_M) { |
| 615 for (unsigned int j = i + 1; j < end; j++) |
| 616 if (is_consonant (info[j])) { |
| 617 » info[j].indic_position() = POS_FINAL_C; |
| 618 » break; |
| 619 » } |
| 620 break; |
| 621 } |
395 | 622 |
396 /* Handle beginning Ra */ | 623 /* Handle beginning Ra */ |
397 if (has_reph) | 624 if (has_reph) |
398 info[start].indic_position() = POS_RA_TO_BECOME_REPH; | 625 info[start].indic_position() = POS_RA_TO_BECOME_REPH; |
399 | 626 |
400 /* For old-style Indic script tags, move the first post-base Halant after | 627 /* For old-style Indic script tags, move the first post-base Halant after |
401 * last consonant. */ | 628 * last consonant. */ |
402 if ((map->get_chosen_script (0) & 0x000000FF) != '2') { | 629 if (indic_plan->is_old_spec) { |
403 /* We should only do this for Indic scripts which have a version two I guess
. */ | |
404 for (unsigned int i = base + 1; i < end; i++) | 630 for (unsigned int i = base + 1; i < end; i++) |
405 if (info[i].indic_category() == OT_H) { | 631 if (info[i].indic_category() == OT_H) { |
406 unsigned int j; | 632 unsigned int j; |
407 for (j = end - 1; j > i; j--) | 633 for (j = end - 1; j > i; j--) |
408 if (is_consonant (info[j])) | 634 if (is_consonant (info[j])) |
409 break; | 635 break; |
410 if (j > i) { | 636 if (j > i) { |
411 /* Move Halant to after last consonant. */ | 637 /* Move Halant to after last consonant. */ |
412 hb_glyph_info_t t = info[i]; | 638 hb_glyph_info_t t = info[i]; |
413 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); | 639 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); |
414 info[j] = t; | 640 info[j] = t; |
415 } | 641 } |
416 break; | 642 break; |
417 } | 643 } |
418 } | 644 } |
419 | 645 |
420 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ | 646 /* Attach misc marks to previous char to move with them. */ |
421 if (!indic_options ().uniscribe_bug_compatible) | |
422 { | 647 { |
423 /* Please update the Uniscribe branch when touching this! */ | 648 indic_position_t last_pos = POS_START; |
424 for (unsigned int i = start + 1; i < end; i++) | 649 for (unsigned int i = start; i < end; i++) |
425 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | F
LAG (OT_N) | FLAG (OT_H)))) | 650 { |
426 » info[i].indic_position() = info[i - 1].indic_position(); | 651 if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG
(OT_RS) | HALANT_OR_COENG_FLAGS))) |
427 } else { | 652 { |
428 /* | 653 » info[i].indic_position() = last_pos; |
429 * Uniscribe doesn't move the Halant with Left Matra. | 654 » if (unlikely (indic_options ().uniscribe_bug_compatible && |
430 * TEST: U+092B,U+093F,U+094DE | 655 » » info[i].indic_category() == OT_H && |
431 */ | 656 » » info[i].indic_position() == POS_PRE_M)) |
432 /* Please update the non-Uniscribe branch when touching this! */ | 657 » { |
433 for (unsigned int i = start + 1; i < end; i++) | 658 » /* |
434 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | F
LAG (OT_N) | FLAG (OT_H)))) { | 659 » * Uniscribe doesn't move the Halant with Left Matra. |
435 » info[i].indic_position() = info[i - 1].indic_position(); | 660 » * TEST: U+092B,U+093F,U+094DE |
436 » if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_
PRE_M) | 661 » */ |
437 for (unsigned int j = i; j > start; j--) | 662 for (unsigned int j = i; j > start; j--) |
438 if (info[j - 1].indic_position() != POS_PRE_M) { | 663 if (info[j - 1].indic_position() != POS_PRE_M) { |
439 info[i].indic_position() = info[j - 1].indic_position(); | 664 info[i].indic_position() = info[j - 1].indic_position(); |
440 break; | 665 break; |
441 } | 666 } |
| 667 } |
| 668 } else if (info[i].indic_position() != POS_SMVD) { |
| 669 last_pos = (indic_position_t) info[i].indic_position(); |
| 670 } |
| 671 } |
| 672 } |
| 673 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */ |
| 674 { |
| 675 unsigned int last_halant = end; |
| 676 for (unsigned int i = base + 1; i < end; i++) |
| 677 if (is_halant_or_coeng (info[i])) |
| 678 last_halant = i; |
| 679 else if (is_consonant (info[i])) { |
| 680 for (unsigned int j = last_halant; j < i; j++) |
| 681 if (info[j].indic_position() != POS_SMVD) |
| 682 info[j].indic_position() = info[i].indic_position(); |
442 } | 683 } |
443 } | 684 } |
444 | 685 |
445 /* We do bubble-sort, skip malicious clusters attempts */ | |
446 if (end - start < 64) | |
447 { | 686 { |
| 687 /* Things are out-of-control for post base positions, they may shuffle |
| 688 * around like crazy, so merge clusters. For pre-base stuff, we handle |
| 689 * cluster issues in final reordering. */ |
| 690 buffer->merge_clusters (base, end); |
448 /* Sit tight, rock 'n roll! */ | 691 /* Sit tight, rock 'n roll! */ |
449 hb_bubble_sort (info + start, end - start, compare_indic_order); | 692 hb_bubble_sort (info + start, end - start, compare_indic_order); |
450 /* Find base again */ | 693 /* Find base again */ |
451 base = end; | 694 base = end; |
452 for (unsigned int i = start; i < end; i++) | 695 for (unsigned int i = start; i < end; i++) |
453 if (info[i].indic_position() == POS_BASE_C) { | 696 if (info[i].indic_position() == POS_BASE_C) { |
454 base = i; | 697 base = i; |
455 break; | 698 break; |
456 } | 699 } |
457 } | 700 } |
458 | 701 |
459 /* Setup masks now */ | 702 /* Setup masks now */ |
460 | 703 |
461 { | 704 { |
462 hb_mask_t mask; | 705 hb_mask_t mask; |
463 | 706 |
464 /* Reph */ | 707 /* Reph */ |
465 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_T
O_BECOME_REPH; i++) | 708 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_T
O_BECOME_REPH; i++) |
466 info[i].mask |= mask_array[RPHF]; | 709 info[i].mask |= indic_plan->mask_array[RPHF]; |
467 | 710 |
468 /* Pre-base */ | 711 /* Pre-base */ |
469 mask = mask_array[HALF] | mask_array[AKHN] | mask_array[CJCT]; | 712 mask = indic_plan->mask_array[HALF]; |
470 for (unsigned int i = start; i < base; i++) | 713 for (unsigned int i = start; i < base; i++) |
471 info[i].mask |= mask; | 714 info[i].mask |= mask; |
472 /* Base */ | 715 /* Base */ |
473 mask = mask_array[AKHN] | mask_array[CJCT]; | 716 mask = 0; |
474 info[base].mask |= mask; | 717 if (base < end) |
| 718 info[base].mask |= mask; |
475 /* Post-base */ | 719 /* Post-base */ |
476 mask = mask_array[BLWF] | mask_array[PSTF] | mask_array[CJCT]; | 720 mask = indic_plan->mask_array[BLWF] | indic_plan->mask_array[ABVF] | indic_p
lan->mask_array[PSTF]; |
477 for (unsigned int i = base + 1; i < end; i++) | 721 for (unsigned int i = base + 1; i < end; i++) |
478 info[i].mask |= mask; | 722 info[i].mask |= mask; |
479 } | 723 } |
480 | 724 |
| 725 if (indic_plan->mask_array[PREF] && base + 2 < end) |
| 726 { |
| 727 /* Find a Halant,Ra sequence and mark it for pre-base reordering processing.
*/ |
| 728 for (unsigned int i = base + 1; i + 1 < end; i++) |
| 729 if (is_halant_or_coeng (info[i + (indic_plan->is_old_spec ? 1 : 0)]) && |
| 730 info[i + (indic_plan->is_old_spec ? 0 : 1)].indic_category() == OT_Ra) |
| 731 { |
| 732 info[i++].mask |= indic_plan->mask_array[PREF]; |
| 733 info[i++].mask |= indic_plan->mask_array[PREF]; |
| 734 |
| 735 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. |
| 736 * Read the feature spec. |
| 737 * This allows distinguishing the following cases with MS Khmer fonts: |
| 738 * U+1784,U+17D2,U+179A,U+17D2,U+1782 |
| 739 * U+1784,U+17D2,U+1782,U+17D2,U+179A |
| 740 */ |
| 741 for (; i < end; i++) |
| 742 info[i].mask |= indic_plan->mask_array[CFAR]; |
| 743 |
| 744 break; |
| 745 } |
| 746 } |
| 747 |
481 /* Apply ZWJ/ZWNJ effects */ | 748 /* Apply ZWJ/ZWNJ effects */ |
482 for (unsigned int i = start + 1; i < end; i++) | 749 for (unsigned int i = start + 1; i < end; i++) |
483 if (is_joiner (info[i])) { | 750 if (is_joiner (info[i])) { |
484 bool non_joiner = info[i].indic_category() == OT_ZWNJ; | 751 bool non_joiner = info[i].indic_category() == OT_ZWNJ; |
485 unsigned int j = i; | 752 unsigned int j = i; |
486 | 753 |
487 do { | 754 do { |
488 j--; | 755 j--; |
489 | 756 |
490 » info[j].mask &= ~mask_array[CJCT]; | 757 » /* A ZWJ disables CJCT, however, it's mere presence is enough |
| 758 » * to disable ligation. No explicit action needed. */ |
| 759 |
| 760 » /* A ZWNJ disables HALF. */ |
491 if (non_joiner) | 761 if (non_joiner) |
492 » info[j].mask &= ~mask_array[HALF]; | 762 » info[j].mask &= ~indic_plan->mask_array[HALF]; |
493 | 763 |
494 } while (j > start && !is_consonant (info[j])); | 764 } while (j > start && !is_consonant (info[j])); |
495 } | 765 } |
496 } | 766 } |
497 | 767 |
498 | 768 |
499 static void | 769 static void |
500 initial_reordering_vowel_syllable (const hb_ot_map_t *map, | 770 initial_reordering_vowel_syllable (const hb_ot_shape_plan_t *plan, |
501 hb_buffer_t *buffer, | 771 hb_buffer_t *buffer, |
502 hb_mask_t *mask_array, | |
503 unsigned int start, unsigned int end) | 772 unsigned int start, unsigned int end) |
504 { | 773 { |
505 /* We made the vowels look like consonants. So let's call the consonant logic
! */ | 774 /* We made the vowels look like consonants. So let's call the consonant logic
! */ |
506 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end); | 775 initial_reordering_consonant_syllable (plan, buffer, start, end); |
507 } | 776 } |
508 | 777 |
509 static void | 778 static void |
510 initial_reordering_standalone_cluster (const hb_ot_map_t *map, | 779 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, |
511 hb_buffer_t *buffer, | 780 hb_buffer_t *buffer, |
512 hb_mask_t *mask_array, | |
513 unsigned int start, unsigned int end) | 781 unsigned int start, unsigned int end) |
514 { | 782 { |
515 /* We treat NBSP/dotted-circle as if they are consonants, so we should just ch
ain. | 783 /* We treat NBSP/dotted-circle as if they are consonants, so we should just ch
ain. |
516 * Only if not in compatibility mode that is... */ | 784 * Only if not in compatibility mode that is... */ |
517 | 785 |
518 if (indic_options ().uniscribe_bug_compatible) | 786 if (indic_options ().uniscribe_bug_compatible) |
519 { | 787 { |
520 /* For dotted-circle, this is what Uniscribe does: | 788 /* For dotted-circle, this is what Uniscribe does: |
521 * If dotted-circle is the last glyph, it just does nothing. | 789 * If dotted-circle is the last glyph, it just does nothing. |
522 * Ie. It doesn't form Reph. */ | 790 * Ie. It doesn't form Reph. */ |
523 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) | 791 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) |
524 return; | 792 return; |
525 } | 793 } |
526 | 794 |
527 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end); | 795 initial_reordering_consonant_syllable (plan, buffer, start, end); |
528 } | 796 } |
529 | 797 |
530 static void | 798 static void |
531 initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED, | 799 initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan, |
532 » » » hb_buffer_t *buffer HB_UNUSED, | 800 » » » » hb_buffer_t *buffer, |
533 » » » hb_mask_t *mask_array HB_UNUSED, | 801 » » » » unsigned int start, unsigned int end) |
534 » » » unsigned int start HB_UNUSED, unsigned int end HB_
UNUSED) | 802 { |
| 803 /* We already inserted dotted-circles, so just call the standalone_cluster. */ |
| 804 initial_reordering_standalone_cluster (plan, buffer, start, end); |
| 805 } |
| 806 |
| 807 static void |
| 808 initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, |
| 809 » » » » hb_buffer_t *buffer HB_UNUSED, |
| 810 » » » » unsigned int start HB_UNUSED, unsigned int
end HB_UNUSED) |
535 { | 811 { |
536 /* Nothing to do right now. If we ever switch to using the output | 812 /* Nothing to do right now. If we ever switch to using the output |
537 * buffer in the reordering process, we'd need to next_glyph() here. */ | 813 * buffer in the reordering process, we'd need to next_glyph() here. */ |
538 } | 814 } |
539 | 815 |
540 #include "hb-ot-shape-complex-indic-machine.hh" | |
541 | 816 |
542 static void | 817 static void |
543 initial_reordering (const hb_ot_map_t *map, | 818 initial_reordering_syllable (const hb_ot_shape_plan_t *plan, |
544 » » hb_face_t *face HB_UNUSED, | 819 » » » hb_buffer_t *buffer, |
545 » » hb_buffer_t *buffer, | 820 » » » unsigned int start, unsigned int end) |
546 » » void *user_data HB_UNUSED) | |
547 { | 821 { |
548 hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; | 822 syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllabl
e() & 0x0F); |
549 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); | 823 switch (syllable_type) { |
550 for (unsigned int i = 0; i < num_masks; i++) | 824 case consonant_syllable:» initial_reordering_consonant_syllable (plan, buf
fer, start, end); return; |
551 mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); | 825 case vowel_syllable:» » initial_reordering_vowel_syllable (plan, buf
fer, start, end); return; |
| 826 case standalone_cluster:» initial_reordering_standalone_cluster (plan, buf
fer, start, end); return; |
| 827 case broken_cluster:» » initial_reordering_broken_cluster (plan, buf
fer, start, end); return; |
| 828 case non_indic_cluster:» initial_reordering_non_indic_cluster (plan, buf
fer, start, end); return; |
| 829 } |
| 830 } |
552 | 831 |
553 find_syllables (map, buffer, mask_array); | 832 static inline void |
| 833 insert_dotted_circles (const hb_ot_shape_plan_t *plan, |
| 834 » » hb_font_t *font, |
| 835 » » hb_buffer_t *buffer) |
| 836 { |
| 837 /* Note: This loop is extra overhead, but should not be measurable. */ |
| 838 bool has_broken_syllables = false; |
| 839 unsigned int count = buffer->len; |
| 840 for (unsigned int i = 0; i < count; i++) |
| 841 if ((buffer->info[i].syllable() & 0x0F) == broken_cluster) { |
| 842 has_broken_syllables = true; |
| 843 break; |
| 844 } |
| 845 if (likely (!has_broken_syllables)) |
| 846 return; |
| 847 |
| 848 |
| 849 hb_codepoint_t dottedcircle_glyph; |
| 850 if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph)) |
| 851 return; |
| 852 |
| 853 hb_glyph_info_t dottedcircle; |
| 854 dottedcircle.codepoint = 0x25CC; |
| 855 set_indic_properties (dottedcircle); |
| 856 dottedcircle.codepoint = dottedcircle_glyph; |
| 857 |
| 858 buffer->clear_output (); |
| 859 |
| 860 buffer->idx = 0; |
| 861 unsigned int last_syllable = 0; |
| 862 while (buffer->idx < buffer->len) |
| 863 { |
| 864 unsigned int syllable = buffer->cur().syllable(); |
| 865 syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); |
| 866 if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) |
| 867 { |
| 868 hb_glyph_info_t info = dottedcircle; |
| 869 info.cluster = buffer->cur().cluster; |
| 870 info.mask = buffer->cur().mask; |
| 871 info.syllable() = buffer->cur().syllable(); |
| 872 buffer->output_info (info); |
| 873 last_syllable = syllable; |
| 874 } |
| 875 buffer->next_glyph (); |
| 876 } |
| 877 |
| 878 buffer->swap_buffers (); |
554 } | 879 } |
555 | 880 |
556 static void | 881 static void |
557 final_reordering_syllable (hb_buffer_t *buffer, hb_mask_t *mask_array, | 882 initial_reordering (const hb_ot_shape_plan_t *plan, |
| 883 » » hb_font_t *font, |
| 884 » » hb_buffer_t *buffer) |
| 885 { |
| 886 update_consonant_positions (plan, font, buffer); |
| 887 insert_dotted_circles (plan, font, buffer); |
| 888 |
| 889 hb_glyph_info_t *info = buffer->info; |
| 890 unsigned int count = buffer->len; |
| 891 if (unlikely (!count)) return; |
| 892 unsigned int last = 0; |
| 893 unsigned int last_syllable = info[0].syllable(); |
| 894 for (unsigned int i = 1; i < count; i++) |
| 895 if (last_syllable != info[i].syllable()) { |
| 896 initial_reordering_syllable (plan, buffer, last, i); |
| 897 last = i; |
| 898 last_syllable = info[last].syllable(); |
| 899 } |
| 900 initial_reordering_syllable (plan, buffer, last, count); |
| 901 } |
| 902 |
| 903 static void |
| 904 final_reordering_syllable (const hb_ot_shape_plan_t *plan, |
| 905 » » » hb_buffer_t *buffer, |
558 unsigned int start, unsigned int end) | 906 unsigned int start, unsigned int end) |
559 { | 907 { |
| 908 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data
; |
560 hb_glyph_info_t *info = buffer->info; | 909 hb_glyph_info_t *info = buffer->info; |
561 | 910 |
562 /* 4. Final reordering: | 911 /* 4. Final reordering: |
563 * | 912 * |
564 * After the localized forms and basic shaping forms GSUB features have been | 913 * After the localized forms and basic shaping forms GSUB features have been |
565 * applied (see below), the shaping engine performs some final glyph | 914 * applied (see below), the shaping engine performs some final glyph |
566 * reordering before applying all the remaining font features to the entire | 915 * reordering before applying all the remaining font features to the entire |
567 * cluster. | 916 * cluster. |
568 */ | 917 */ |
569 | 918 |
570 /* Find base again */ | 919 /* Find base again */ |
571 unsigned int base = end; | 920 unsigned int base; |
572 for (unsigned int i = start; i < end; i++) | 921 for (base = start; base < end; base++) |
573 if (info[i].indic_position() == POS_BASE_C) { | 922 if (info[base].indic_position() >= POS_BASE_C) { |
574 base = i; | 923 if (start < base && info[base].indic_position() > POS_BASE_C) |
| 924 base--; |
575 break; | 925 break; |
576 } | 926 } |
577 | 927 |
578 if (base == start) { | |
579 /* There's no Reph, and no left Matra to reposition. Just merge the cluster | |
580 * and go home. */ | |
581 buffer->merge_clusters (start, end); | |
582 return; | |
583 } | |
584 | |
585 unsigned int start_of_last_cluster = base; | |
586 | 928 |
587 /* o Reorder matras: | 929 /* o Reorder matras: |
588 * | 930 * |
589 * If a pre-base matra character had been reordered before applying basic | 931 * If a pre-base matra character had been reordered before applying basic |
590 * features, the glyph can be moved closer to the main consonant based on | 932 * features, the glyph can be moved closer to the main consonant based on |
591 * whether half-forms had been formed. Actual position for the matra is | 933 * whether half-forms had been formed. Actual position for the matra is |
592 * defined as “after last standalone halant glyph, after initial matra | 934 * defined as “after last standalone halant glyph, after initial matra |
593 * position and before the main consonant”. If ZWJ or ZWNJ follow this | 935 * position and before the main consonant”. If ZWJ or ZWNJ follow this |
594 * halant, position is moved after it. | 936 * halant, position is moved after it. |
595 */ | 937 */ |
596 | 938 |
| 939 if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base
matra characters. */ |
597 { | 940 { |
598 unsigned int new_matra_pos = base - 1; | 941 /* If we lost track of base, alas, position before last thingy. */ |
599 while (new_matra_pos > start && | 942 unsigned int new_pos = base == end ? base - 2 : base - 1; |
600 » !(FLAG (info[new_matra_pos].indic_category()) & (FLAG (OT_M) | FLAG (
OT_H)))) | |
601 new_matra_pos--; | |
602 /* If we found no Halant we are done. Otherwise only proceed if the Halant
does | |
603 * not belong to the Matra itself! */ | |
604 if (info[new_matra_pos].indic_category() == OT_H && | |
605 » info[new_matra_pos].indic_position() != POS_PRE_M) { | |
606 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ | |
607 if (new_matra_pos + 1 < end && is_joiner (info[new_matra_pos + 1])) | |
608 » new_matra_pos++; | |
609 | 943 |
| 944 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. |
| 945 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
| 946 * We want to position matra after them. |
| 947 */ |
| 948 if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != H
B_SCRIPT_TAMIL) |
| 949 { |
| 950 while (new_pos > start && |
| 951 !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_C
oeng))))) |
| 952 new_pos--; |
| 953 |
| 954 /* If we found no Halant we are done. |
| 955 * Otherwise only proceed if the Halant does |
| 956 * not belong to the Matra itself! */ |
| 957 if (is_halant_or_coeng (info[new_pos]) && |
| 958 info[new_pos].indic_position() != POS_PRE_M) |
| 959 { |
| 960 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ |
| 961 if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) |
| 962 new_pos++; |
| 963 } |
| 964 else |
| 965 new_pos = start; /* No move. */ |
| 966 } |
| 967 |
| 968 if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) |
| 969 { |
610 /* Now go see if there's actually any matras... */ | 970 /* Now go see if there's actually any matras... */ |
611 for (unsigned int i = new_matra_pos; i > start; i--) | 971 for (unsigned int i = new_pos; i > start; i--) |
612 if (info[i - 1].indic_position () == POS_PRE_M) | 972 if (info[i - 1].indic_position () == POS_PRE_M) |
613 { | 973 { |
614 » unsigned int old_matra_pos = i - 1; | 974 » unsigned int old_pos = i - 1; |
615 » hb_glyph_info_t matra = info[old_matra_pos]; | 975 » hb_glyph_info_t tmp = info[old_pos]; |
616 » memmove (&info[old_matra_pos], &info[old_matra_pos + 1], (new_matra_po
s - old_matra_pos) * sizeof (info[0])); | 976 » memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * siz
eof (info[0])); |
617 » info[new_matra_pos] = matra; | 977 » info[new_pos] = tmp; |
618 » start_of_last_cluster = MIN (new_matra_pos, start_of_last_cluster); | 978 » new_pos--; |
619 » new_matra_pos--; | 979 » } |
| 980 buffer->merge_clusters (new_pos, MIN (end, base + 1)); |
| 981 } else { |
| 982 for (unsigned int i = start; i < base; i++) |
| 983 » if (info[i].indic_position () == POS_PRE_M) { |
| 984 » buffer->merge_clusters (i, MIN (end, base + 1)); |
| 985 » break; |
620 } | 986 } |
621 } | 987 } |
622 } | 988 } |
623 | 989 |
624 | 990 |
625 /* o Reorder reph: | 991 /* o Reorder reph: |
626 * | 992 * |
627 * Reph’s original position is always at the beginning of the syllable, | 993 * Reph’s original position is always at the beginning of the syllable, |
628 * (i.e. it is not reordered at the character reordering stage). However, | 994 * (i.e. it is not reordered at the character reordering stage). However, |
629 * it will be reordered according to the basic-forms shaping results. | 995 * it will be reordered according to the basic-forms shaping results. |
630 * Possible positions for reph, depending on the script, are; after main, | 996 * Possible positions for reph, depending on the script, are; after main, |
631 * before post-base consonant forms, and after post-base consonant forms. | 997 * before post-base consonant forms, and after post-base consonant forms. |
632 */ | 998 */ |
633 | 999 |
634 /* If there's anything after the Ra that has the REPH pos, it ought to be hala
nt. | 1000 /* If there's anything after the Ra that has the REPH pos, it ought to be hala
nt. |
635 * Which means that the font has failed to ligate the Reph. In which case, we | 1001 * Which means that the font has failed to ligate the Reph. In which case, we |
636 * shouldn't move. */ | 1002 * shouldn't move. */ |
637 if (start + 1 < end && | 1003 if (start + 1 < end && |
638 info[start].indic_position() == POS_RA_TO_BECOME_REPH && | 1004 info[start].indic_position() == POS_RA_TO_BECOME_REPH && |
639 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH) | 1005 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH) |
640 { | 1006 { |
641 unsigned int new_reph_pos; | 1007 unsigned int new_reph_pos; |
| 1008 reph_position_t reph_pos = indic_plan->config->reph_pos; |
642 | 1009 |
643 enum reph_position_t { | 1010 /* XXX Figure out old behavior too */ |
644 REPH_AFTER_MAIN, | |
645 REPH_BEFORE_SUBSCRIPT, | |
646 REPH_AFTER_SUBSCRIPT, | |
647 REPH_BEFORE_POSTSCRIPT, | |
648 REPH_AFTER_POSTSCRIPT | |
649 } reph_pos; | |
650 | |
651 /* XXX Figure out old behavior too */ | |
652 switch ((hb_tag_t) buffer->props.script) | |
653 { | |
654 case HB_SCRIPT_MALAYALAM: | |
655 case HB_SCRIPT_ORIYA: | |
656 » reph_pos = REPH_AFTER_MAIN; | |
657 » break; | |
658 | |
659 case HB_SCRIPT_GURMUKHI: | |
660 » reph_pos = REPH_BEFORE_SUBSCRIPT; | |
661 » break; | |
662 | |
663 case HB_SCRIPT_BENGALI: | |
664 » reph_pos = REPH_AFTER_SUBSCRIPT; | |
665 » break; | |
666 | |
667 default: | |
668 case HB_SCRIPT_DEVANAGARI: | |
669 case HB_SCRIPT_GUJARATI: | |
670 » reph_pos = REPH_BEFORE_POSTSCRIPT; | |
671 » break; | |
672 | |
673 case HB_SCRIPT_KANNADA: | |
674 case HB_SCRIPT_TAMIL: | |
675 case HB_SCRIPT_TELUGU: | |
676 » reph_pos = REPH_AFTER_POSTSCRIPT; | |
677 » break; | |
678 } | |
679 | 1011 |
680 /* 1. If reph should be positioned after post-base consonant forms, | 1012 /* 1. If reph should be positioned after post-base consonant forms, |
681 * proceed to step 5. | 1013 * proceed to step 5. |
682 */ | 1014 */ |
683 if (reph_pos == REPH_AFTER_POSTSCRIPT) | 1015 if (reph_pos == REPH_POS_AFTER_POST) |
684 { | 1016 { |
685 goto reph_step_5; | 1017 goto reph_step_5; |
686 } | 1018 } |
687 | 1019 |
688 /* 2. If the reph repositioning class is not after post-base: target | 1020 /* 2. If the reph repositioning class is not after post-base: target |
689 * position is after the first explicit halant glyph between the | 1021 * position is after the first explicit halant glyph between the |
690 * first post-reph consonant and last main consonant. If ZWJ or ZWN
J | 1022 * first post-reph consonant and last main consonant. If ZWJ or ZWN
J |
691 * are following this halant, position is moved after it. If such | 1023 * are following this halant, position is moved after it. If such |
692 * position is found, this is the target position. Otherwise, | 1024 * position is found, this is the target position. Otherwise, |
693 * proceed to the next step. | 1025 * proceed to the next step. |
694 * | 1026 * |
695 * Note: in old-implementation fonts, where classifications were | 1027 * Note: in old-implementation fonts, where classifications were |
696 * fixed in shaping engine, there was no case where reph position | 1028 * fixed in shaping engine, there was no case where reph position |
697 * will be found on this step. | 1029 * will be found on this step. |
698 */ | 1030 */ |
699 { | 1031 { |
700 new_reph_pos = start + 1; | 1032 new_reph_pos = start + 1; |
701 while (new_reph_pos < base && info[new_reph_pos].indic_category() != OT_H) | 1033 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) |
702 new_reph_pos++; | 1034 new_reph_pos++; |
703 | 1035 |
704 if (new_reph_pos < base && info[new_reph_pos].indic_category() == OT_H) { | 1036 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { |
705 /* ->If ZWJ or ZWNJ are following this halant, position is moved after i
t. */ | 1037 /* ->If ZWJ or ZWNJ are following this halant, position is moved after i
t. */ |
706 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) | 1038 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) |
707 new_reph_pos++; | 1039 new_reph_pos++; |
708 goto reph_move; | 1040 goto reph_move; |
709 } | 1041 } |
710 } | 1042 } |
711 | 1043 |
712 /* 3. If reph should be repositioned after the main consonant: find th
e | 1044 /* 3. If reph should be repositioned after the main consonant: find th
e |
713 * first consonant not ligated with main, or find the first | 1045 * first consonant not ligated with main, or find the first |
714 * consonant that is not a potential pre-base reordering Ra. | 1046 * consonant that is not a potential pre-base reordering Ra. |
715 */ | 1047 */ |
716 if (reph_pos == REPH_AFTER_MAIN) | 1048 if (reph_pos == REPH_POS_AFTER_MAIN) |
717 { | 1049 { |
718 /* XXX */ | 1050 new_reph_pos = base; |
| 1051 /* XXX Skip potential pre-base reordering Ra. */ |
| 1052 while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <
= POS_AFTER_MAIN) |
| 1053 » new_reph_pos++; |
| 1054 if (new_reph_pos < end) |
| 1055 goto reph_move; |
719 } | 1056 } |
720 | 1057 |
721 /* 4. If reph should be positioned before post-base consonant, find | 1058 /* 4. If reph should be positioned before post-base consonant, find |
722 * first post-base classified consonant not ligated with main. If n
o | 1059 * first post-base classified consonant not ligated with main. If n
o |
723 * consonant is found, the target position should be before the | 1060 * consonant is found, the target position should be before the |
724 * first matra, syllable modifier sign or vedic sign. | 1061 * first matra, syllable modifier sign or vedic sign. |
725 */ | 1062 */ |
726 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ | 1063 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ |
727 if (reph_pos == REPH_AFTER_SUBSCRIPT) | 1064 if (reph_pos == REPH_POS_AFTER_SUB) |
728 { | 1065 { |
729 new_reph_pos = base; | 1066 new_reph_pos = base; |
730 while (new_reph_pos < end && | 1067 while (new_reph_pos < end && |
731 » !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST
_C) | FLAG (POS_POST_M) | FLAG (POS_SMVD)))) | 1068 » !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST
_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) |
732 new_reph_pos++; | 1069 new_reph_pos++; |
733 if (new_reph_pos < end) | 1070 if (new_reph_pos < end) |
734 goto reph_move; | 1071 goto reph_move; |
735 } | 1072 } |
736 | 1073 |
737 /* 5. If no consonant is found in steps 3 or 4, move reph to a positio
n | 1074 /* 5. If no consonant is found in steps 3 or 4, move reph to a positio
n |
738 * immediately before the first post-base matra, syllable modifier | 1075 * immediately before the first post-base matra, syllable modifier |
739 * sign or vedic sign that has a reordering class after the intende
d | 1076 * sign or vedic sign that has a reordering class after the intende
d |
740 * reph position. For example, if the reordering position for reph | 1077 * reph position. For example, if the reordering position for reph |
741 * is post-main, it will skip above-base matras that also have a | 1078 * is post-main, it will skip above-base matras that also have a |
742 * post-main position. | 1079 * post-main position. |
743 */ | 1080 */ |
744 reph_step_5: | 1081 reph_step_5: |
745 { | 1082 { |
746 /* XXX */ | 1083 /* Copied from step 2. */ |
| 1084 new_reph_pos = start + 1; |
| 1085 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) |
| 1086 » new_reph_pos++; |
| 1087 |
| 1088 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { |
| 1089 » /* ->If ZWJ or ZWNJ are following this halant, position is moved after i
t. */ |
| 1090 » if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) |
| 1091 » new_reph_pos++; |
| 1092 » goto reph_move; |
| 1093 } |
747 } | 1094 } |
748 | 1095 |
749 /* 6. Otherwise, reorder reph to the end of the syllable. | 1096 /* 6. Otherwise, reorder reph to the end of the syllable. |
750 */ | 1097 */ |
751 { | 1098 { |
752 new_reph_pos = end - 1; | 1099 new_reph_pos = end - 1; |
753 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_
SMVD) | 1100 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_
SMVD) |
754 new_reph_pos--; | 1101 new_reph_pos--; |
755 | 1102 |
756 /* | 1103 /* |
757 * If the Reph is to be ending up after a Matra,Halant sequence, | 1104 * If the Reph is to be ending up after a Matra,Halant sequence, |
758 * position it before that Halant so it can interact with the Matra. | 1105 * position it before that Halant so it can interact with the Matra. |
759 * However, if it's a plain Consonant,Halant we shouldn't do that. | 1106 * However, if it's a plain Consonant,Halant we shouldn't do that. |
760 * Uniscribe doesn't do this. | 1107 * Uniscribe doesn't do this. |
761 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D | 1108 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
762 */ | 1109 */ |
763 if (!indic_options ().uniscribe_bug_compatible && | 1110 if (!indic_options ().uniscribe_bug_compatible && |
764 » unlikely (info[new_reph_pos].indic_category() == OT_H)) { | 1111 » unlikely (is_halant_or_coeng (info[new_reph_pos]))) { |
765 for (unsigned int i = base + 1; i < new_reph_pos; i++) | 1112 for (unsigned int i = base + 1; i < new_reph_pos; i++) |
766 if (info[i].indic_category() == OT_M) { | 1113 if (info[i].indic_category() == OT_M) { |
767 /* Ok, got it. */ | 1114 /* Ok, got it. */ |
768 new_reph_pos--; | 1115 new_reph_pos--; |
769 } | 1116 } |
770 } | 1117 } |
771 goto reph_move; | 1118 goto reph_move; |
772 } | 1119 } |
773 | 1120 |
774 reph_move: | 1121 reph_move: |
775 { | 1122 { |
| 1123 /* Yay, one big cluster! Merge before moving. */ |
| 1124 buffer->merge_clusters (start, end); |
| 1125 |
776 /* Move */ | 1126 /* Move */ |
777 hb_glyph_info_t reph = info[start]; | 1127 hb_glyph_info_t reph = info[start]; |
778 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (
info[0])); | 1128 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (
info[0])); |
779 info[new_reph_pos] = reph; | 1129 info[new_reph_pos] = reph; |
780 start_of_last_cluster = start; /* Yay, one big cluster! */ | |
781 } | 1130 } |
782 } | 1131 } |
783 | 1132 |
784 | 1133 |
785 /* o Reorder pre-base reordering consonants: | 1134 /* o Reorder pre-base reordering consonants: |
786 * | 1135 * |
787 * If a pre-base reordering consonant is found, reorder it according to | 1136 * If a pre-base reordering consonant is found, reorder it according to |
788 * the following rules: | 1137 * the following rules: |
789 * | |
790 * 1. Only reorder a glyph produced by substitution during application | |
791 * of the feature. (Note that a font may shape a Ra consonant with | |
792 * the feature generally but block it in certain contexts.) | |
793 * | |
794 * 2. Try to find a target position the same way as for pre-base matra. | |
795 * If it is found, reorder pre-base consonant glyph. | |
796 * | |
797 * 3. If position is not found, reorder immediately before main | |
798 * consonant. | |
799 */ | 1138 */ |
800 | 1139 |
801 /* TODO */ | 1140 if (indic_plan->mask_array[PREF] && base + 1 < end) /* Otherwise there can't b
e any pre-base reordering Ra. */ |
| 1141 { |
| 1142 for (unsigned int i = base + 1; i < end; i++) |
| 1143 if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) |
| 1144 { |
| 1145 » /* 1. Only reorder a glyph produced by substitution during applica
tion |
| 1146 » * of the <pref> feature. (Note that a font may shape a Ra cons
onant with |
| 1147 » * the feature generally but block it in certain contexts.) |
| 1148 » */ |
| 1149 » if (i + 1 == end || (info[i + 1].mask & indic_plan->mask_array[PREF]) ==
0) |
| 1150 » { |
| 1151 » /* |
| 1152 » * 2. Try to find a target position the same way as for pre-base
matra. |
| 1153 » * If it is found, reorder pre-base consonant glyph. |
| 1154 » * |
| 1155 » * 3. If position is not found, reorder immediately before main |
| 1156 » * consonant. |
| 1157 » */ |
802 | 1158 |
| 1159 unsigned int new_pos = base; |
| 1160 while (new_pos > start && |
| 1161 !(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FL
AGS))) |
| 1162 new_pos--; |
| 1163 |
| 1164 /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes aft
er a |
| 1165 * split matra, it should be reordered to *before* the left part of su
ch matra. */ |
| 1166 if (new_pos > start && info[new_pos - 1].indic_category() == OT_M) |
| 1167 { |
| 1168 unsigned int old_pos = i; |
| 1169 for (unsigned int i = base + 1; i < old_pos; i++) |
| 1170 if (info[i].indic_category() == OT_M) |
| 1171 { |
| 1172 new_pos--; |
| 1173 break; |
| 1174 } |
| 1175 } |
| 1176 |
| 1177 if (new_pos > start && is_halant_or_coeng (info[new_pos - 1])) |
| 1178 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it.
*/ |
| 1179 if (new_pos < end && is_joiner (info[new_pos])) |
| 1180 new_pos++; |
| 1181 |
| 1182 { |
| 1183 unsigned int old_pos = i; |
| 1184 buffer->merge_clusters (new_pos, old_pos + 1); |
| 1185 hb_glyph_info_t tmp = info[old_pos]; |
| 1186 memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * s
izeof (info[0])); |
| 1187 info[new_pos] = tmp; |
| 1188 } |
| 1189 } |
| 1190 |
| 1191 break; |
| 1192 } |
| 1193 } |
803 | 1194 |
804 | 1195 |
805 /* Apply 'init' to the Left Matra if it's a word start. */ | 1196 /* Apply 'init' to the Left Matra if it's a word start. */ |
806 if (info[start].indic_position () == POS_PRE_M && | 1197 if (info[start].indic_position () == POS_PRE_M && |
807 (!start || | 1198 (!start || |
808 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & | 1199 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & |
809 » (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | | 1200 » FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATE
GORY_NON_SPACING_MARK)))) |
810 » FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | | 1201 info[start].mask |= indic_plan->mask_array[INIT]; |
811 » FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | | |
812 » FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | | |
813 » FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | | |
814 » FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | | |
815 » FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | | |
816 » FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))) | |
817 info[start].mask |= mask_array[INIT]; | |
818 | 1202 |
819 | 1203 |
820 | 1204 /* |
821 /* Finish off the clusters and go home! */ | 1205 * Finish off the clusters and go home! |
822 | 1206 */ |
823 if (!indic_options ().uniscribe_bug_compatible) | 1207 if (indic_options ().uniscribe_bug_compatible) |
824 { | 1208 { |
825 /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWN
J. | 1209 /* Uniscribe merges the entire cluster. |
826 * This means, half forms are submerged into the main consonants cluster. | 1210 * This means, half forms are submerged into the main consonants cluster. |
827 * This is unnecessary, and makes cursor positioning harder, but that's what | 1211 * This is unnecessary, and makes cursor positioning harder, but that's what |
828 * Uniscribe does. */ | 1212 * Uniscribe does. */ |
829 unsigned int cluster_start = start; | 1213 buffer->merge_clusters (start, end); |
830 for (unsigned int i = start + 1; i < start_of_last_cluster; i++) | |
831 if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT
_ZWNJ) { | |
832 i++; | |
833 » buffer->merge_clusters (cluster_start, i); | |
834 » cluster_start = i; | |
835 } | |
836 start_of_last_cluster = cluster_start; | |
837 } | 1214 } |
838 | |
839 buffer->merge_clusters (start_of_last_cluster, end); | |
840 } | 1215 } |
841 | 1216 |
842 | 1217 |
843 static void | 1218 static void |
844 final_reordering (const hb_ot_map_t *map, | 1219 final_reordering (const hb_ot_shape_plan_t *plan, |
845 » » hb_face_t *face HB_UNUSED, | 1220 » » hb_font_t *font, |
846 » » hb_buffer_t *buffer, | 1221 » » hb_buffer_t *buffer) |
847 » » void *user_data HB_UNUSED) | |
848 { | 1222 { |
849 unsigned int count = buffer->len; | 1223 unsigned int count = buffer->len; |
850 if (!count) return; | 1224 if (unlikely (!count)) return; |
851 | |
852 hb_mask_t mask_array[ARRAY_LENGTH (indic_other_features)] = {0}; | |
853 unsigned int num_masks = ARRAY_LENGTH (indic_other_features); | |
854 for (unsigned int i = 0; i < num_masks; i++) | |
855 mask_array[i] = map->get_1_mask (indic_other_features[i].tag); | |
856 | 1225 |
857 hb_glyph_info_t *info = buffer->info; | 1226 hb_glyph_info_t *info = buffer->info; |
858 unsigned int last = 0; | 1227 unsigned int last = 0; |
859 unsigned int last_syllable = info[0].syllable(); | 1228 unsigned int last_syllable = info[0].syllable(); |
860 for (unsigned int i = 1; i < count; i++) | 1229 for (unsigned int i = 1; i < count; i++) |
861 if (last_syllable != info[i].syllable()) { | 1230 if (last_syllable != info[i].syllable()) { |
862 final_reordering_syllable (buffer, mask_array, last, i); | 1231 final_reordering_syllable (plan, buffer, last, i); |
863 last = i; | 1232 last = i; |
864 last_syllable = info[last].syllable(); | 1233 last_syllable = info[last].syllable(); |
865 } | 1234 } |
866 final_reordering_syllable (buffer, mask_array, last, count); | 1235 final_reordering_syllable (plan, buffer, last, count); |
| 1236 |
| 1237 /* Zero syllables now... */ |
| 1238 for (unsigned int i = 0; i < count; i++) |
| 1239 info[i].syllable() = 0; |
867 | 1240 |
868 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); | 1241 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); |
869 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); | 1242 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); |
870 } | 1243 } |
871 | 1244 |
872 | 1245 |
| 1246 static hb_ot_shape_normalization_mode_t |
| 1247 normalization_preference_indic (const hb_ot_shape_plan_t *plan) |
| 1248 { |
| 1249 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; |
| 1250 } |
873 | 1251 |
| 1252 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = |
| 1253 { |
| 1254 "indic", |
| 1255 collect_features_indic, |
| 1256 override_features_indic, |
| 1257 data_create_indic, |
| 1258 data_destroy_indic, |
| 1259 NULL, /* preprocess_text */ |
| 1260 normalization_preference_indic, |
| 1261 setup_masks_indic, |
| 1262 false, /* zero_width_attached_marks */ |
| 1263 }; |
OLD | NEW |