third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc - Issue 10510004: Roll harfbuzz-ng 3b8fd9c48f4bde368bf2d465c148b9743a9216ee

Side by Side Diff: third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic.cc

Issue 10510004: Roll harfbuzz-ng 3b8fd9c48f4bde368bf2d465c148b9743a9216ee (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/harfbuzz-ng/src/hb-ot-shape-complex-arabic-table.hh ('k') | third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic-machine.hh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright © 2011 Google, Inc.	2 * Copyright © 2011,2012 Google, Inc.

3 *	3 *

4 * This is part of HarfBuzz, a text shaping library.	4 * This is part of HarfBuzz, a text shaping library.

5 *	5 *

6 * Permission is hereby granted, without written agreement and without	6 * Permission is hereby granted, without written agreement and without

7 * license or royalty fees, to use, copy, modify, and distribute this	7 * license or royalty fees, to use, copy, modify, and distribute this

8 * software and its documentation for any purpose, provided that the	8 * software and its documentation for any purpose, provided that the

9 * above copyright notice and the following two paragraphs appear in	9 * above copyright notice and the following two paragraphs appear in

10 * all copies of this software.	10 * all copies of this software.

11 *	11 *

12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR	12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR

13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES	13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES

14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN	14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN

15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH	15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH

16 * DAMAGE.	16 * DAMAGE.

17 *	17 *

18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,	18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,

19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND	19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS	20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS

21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO	21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO

22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.	22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

23 *	23 *

24 * Google Author(s): Behdad Esfahbod	24 * Google Author(s): Behdad Esfahbod

25 */	25 */

26	26

27 #include "hb-ot-shape-complex-private.hh"	27 #include "hb-ot-shape-complex-indic-private.hh"

	28 #include "hb-ot-shape-private.hh"

28	29

	30 static const struct indic_options_t

	31 {

	32 indic_options_t (void)

	33 {

	34 char *c = getenv ("HB_OT_INDIC_OPTIONS");

	35 uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");

	36 }

29	37

30	38 bool uniscribe_bug_compatible;

31 /* buffer var allocations */	39 } options;

32 #define indic_category() complex_var_persistent_u8_0() /* indic_category_t */

33 #define indic_position() complex_var_persistent_u8_1() /* indic_matra_category_t */

34

35 #define INDIC_TABLE_ELEMENT_TYPE uint8_t

36

37 /* Cateories used in the OpenType spec:

38 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx

39 */

40 /* Note: This enum is duplicated in the -machine.rl source file.

41 * Not sure how to avoid duplication. */

42 enum indic_category_t {

43 OT_X = 0,

44 OT_C,

45 OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */

46 OT_V,

47 OT_N,

48 OT_H,

49 OT_ZWNJ,

50 OT_ZWJ,

51 OT_M,

52 OT_SM,

53 OT_VD,

54 OT_A,

55 OT_NBSP

56 };

57

58 /* Visual positions in a syllable from left to right. */

59 enum indic_position_t {

60 POS_PRE,

61 POS_BASE,

62 POS_ABOVE,

63 POS_BELOW,

64 POS_POST

65 };

66

67 /* Categories used in IndicSyllabicCategory.txt from UCD */

68 /* The assignments are guesswork */

69 enum indic_syllabic_category_t {

70 INDIC_SYLLABIC_CATEGORY_OTHER»» » = OT_X,

71

72 INDIC_SYLLABIC_CATEGORY_AVAGRAHA» » = OT_X,

73 INDIC_SYLLABIC_CATEGORY_BINDU»» » = OT_SM,

74 INDIC_SYLLABIC_CATEGORY_CONSONANT» » = OT_C,

75 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD» = OT_C,

76 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL» = OT_C,

77 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER»= OT_C,

78 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL» = OT_C,

79 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER»= OT_NBSP,

80 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED» = OT_C,

81 INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA» = OT_C,

82 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER» = OT_X,

83 INDIC_SYLLABIC_CATEGORY_NUKTA»» » = OT_N,

84 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER» = OT_X,

85 INDIC_SYLLABIC_CATEGORY_TONE_LETTER» » = OT_X,

86 INDIC_SYLLABIC_CATEGORY_TONE_MARK» » = OT_X,

87 INDIC_SYLLABIC_CATEGORY_VIRAMA» » = OT_H,

88 INDIC_SYLLABIC_CATEGORY_VISARGA» » = OT_SM,

89 INDIC_SYLLABIC_CATEGORY_VOWEL»» » = OT_V,

90 INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT» = OT_M,

91 INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT» = OT_V

92 };

93

94 /* Categories used in IndicSMatraCategory.txt from UCD */

95 enum indic_matra_category_t {

96 INDIC_MATRA_CATEGORY_NOT_APPLICABLE» » = POS_BASE,

97

98 INDIC_MATRA_CATEGORY_LEFT» » » = POS_PRE,

99 INDIC_MATRA_CATEGORY_TOP» » » = POS_ABOVE,

100 INDIC_MATRA_CATEGORY_BOTTOM» » » = POS_BELOW,

101 INDIC_MATRA_CATEGORY_RIGHT» » » = POS_POST,

102

103 /* We don't really care much about these since we decompose them

104 * in the generic pre-shaping layer. They will only be used if

105 * the font does not cover the decomposition. In which case, we

106 * define these as aliases to the place we want the split-matra

107 * glyph to show up. Quite arbitrary. */

108 INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT»» = INDIC_MATRA_CATEGORY_BOTTOM,

109 INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT» » = INDIC_MATRA_CATEGORY_LEFT,

110 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM» » = INDIC_MATRA_CATEGORY_BOTTOM,

111 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT»= INDIC_MATRA_CATEGORY_BOTTOM,

112 INDIC_MATRA_CATEGORY_TOP_AND_LEFT» » = INDIC_MATRA_CATEGORY_LEFT,

113 INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT» = INDIC_MATRA_CATEGORY_LEFT,

114 INDIC_MATRA_CATEGORY_TOP_AND_RIGHT» » = INDIC_MATRA_CATEGORY_RIGHT,

115

116 INDIC_MATRA_CATEGORY_INVISIBLE» » = INDIC_MATRA_CATEGORY_NOT_APPLI CABLE,

117 INDIC_MATRA_CATEGORY_OVERSTRUCK» » = INDIC_MATRA_CATEGORY_NOT_APPLI CABLE,

118 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT» = INDIC_MATRA_CATEGORY_NOT_APPLI CABLE

119 };

120

121 /* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and th e comma operation

122 * because gcc fails to optimize the latter and fills the table in at runtime. * /

123 #define INDIC_COMBINE_CATEGORIES(S,M) \

124 (ASSERT_STATIC_EXPR_ZERO (M == INDIC_MATRA_CATEGORY_NOT_APPLICABLE \|\| (S == IN DIC_SYLLABIC_CATEGORY_VIRAMA \|\| S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT)) + \

125 ASSERT_STATIC_EXPR_ZERO (S < 16 && M < 16) + \

126 ((M << 4) \| S))

127

128 #include "hb-ot-shape-complex-indic-table.hh"

129

130 /* XXX

131 * This is a hack for now. We should:

132 * 1. Move this data into the main Indic table,

133 * and/or

134 * 2. Probe font lookups to determine consonant positions.

135 */

136 static const struct consonant_position_t {

137 hb_codepoint_t u;

138 indic_position_t position;

139 } consonant_positions[] = {

140 {0x0930, POS_BELOW},

141 {0x09AC, POS_BELOW},

142 {0x09AF, POS_POST},

143 {0x09B0, POS_BELOW},

144 {0x09F0, POS_BELOW},

145 {0x0A2F, POS_POST},

146 {0x0A30, POS_BELOW},

147 {0x0A35, POS_BELOW},

148 {0x0A39, POS_BELOW},

149 {0x0AB0, POS_BELOW},

150 {0x0B24, POS_BELOW},

151 {0x0B28, POS_BELOW},

152 {0x0B2C, POS_BELOW},

153 {0x0B2D, POS_BELOW},

154 {0x0B2E, POS_BELOW},

155 {0x0B2F, POS_POST},

156 {0x0B30, POS_BELOW},

157 {0x0B32, POS_BELOW},

158 {0x0B33, POS_BELOW},

159 {0x0B5F, POS_POST},

160 {0x0B71, POS_BELOW},

161 {0x0C15, POS_BELOW},

162 {0x0C16, POS_BELOW},

163 {0x0C17, POS_BELOW},

164 {0x0C18, POS_BELOW},

165 {0x0C19, POS_BELOW},

166 {0x0C1A, POS_BELOW},

167 {0x0C1B, POS_BELOW},

168 {0x0C1C, POS_BELOW},

169 {0x0C1D, POS_BELOW},

170 {0x0C1E, POS_BELOW},

171 {0x0C1F, POS_BELOW},

172 {0x0C20, POS_BELOW},

173 {0x0C21, POS_BELOW},

174 {0x0C22, POS_BELOW},

175 {0x0C23, POS_BELOW},

176 {0x0C24, POS_BELOW},

177 {0x0C25, POS_BELOW},

178 {0x0C26, POS_BELOW},

179 {0x0C27, POS_BELOW},

180 {0x0C28, POS_BELOW},

181 {0x0C2A, POS_BELOW},

182 {0x0C2B, POS_BELOW},

183 {0x0C2C, POS_BELOW},

184 {0x0C2D, POS_BELOW},

185 {0x0C2E, POS_BELOW},

186 {0x0C2F, POS_BELOW},

187 {0x0C30, POS_BELOW},

188 {0x0C32, POS_BELOW},

189 {0x0C33, POS_BELOW},

190 {0x0C35, POS_BELOW},

191 {0x0C36, POS_BELOW},

192 {0x0C37, POS_BELOW},

193 {0x0C38, POS_BELOW},

194 {0x0C39, POS_BELOW},

195 {0x0C95, POS_BELOW},

196 {0x0C96, POS_BELOW},

197 {0x0C97, POS_BELOW},

198 {0x0C98, POS_BELOW},

199 {0x0C99, POS_BELOW},

200 {0x0C9A, POS_BELOW},

201 {0x0C9B, POS_BELOW},

202 {0x0C9C, POS_BELOW},

203 {0x0C9D, POS_BELOW},

204 {0x0C9E, POS_BELOW},

205 {0x0C9F, POS_BELOW},

206 {0x0CA0, POS_BELOW},

207 {0x0CA1, POS_BELOW},

208 {0x0CA2, POS_BELOW},

209 {0x0CA3, POS_BELOW},

210 {0x0CA4, POS_BELOW},

211 {0x0CA5, POS_BELOW},

212 {0x0CA6, POS_BELOW},

213 {0x0CA7, POS_BELOW},

214 {0x0CA8, POS_BELOW},

215 {0x0CAA, POS_BELOW},

216 {0x0CAB, POS_BELOW},

217 {0x0CAC, POS_BELOW},

218 {0x0CAD, POS_BELOW},

219 {0x0CAE, POS_BELOW},

220 {0x0CAF, POS_BELOW},

221 {0x0CB0, POS_BELOW},

222 {0x0CB2, POS_BELOW},

223 {0x0CB3, POS_BELOW},

224 {0x0CB5, POS_BELOW},

225 {0x0CB6, POS_BELOW},

226 {0x0CB7, POS_BELOW},

227 {0x0CB8, POS_BELOW},

228 {0x0CB9, POS_BELOW},

229 {0x0CDE, POS_BELOW},

230 {0x0D2F, POS_POST},

231 {0x0D30, POS_POST},

232 {0x0D32, POS_BELOW},

233 {0x0D35, POS_POST},

234 };

235

236 /* XXX

237 * This is a hack for now. We should move this data into the main Indic table.

238 */

239 static const hb_codepoint_t ra_chars[] = {

240 0x0930, /* Devanagari */

241 0x09B0, /* Bengali */

242 0x09F0, /* Bengali */

243 //0x09F1, /* Bengali */

244 //0x0A30, /* Gurmukhi */

245 0x0AB0, /* Gujarati */

246 0x0B30, /* Oriya */

247 //0x0BB0, /* Tamil */

248 //0x0C30, /* Telugu */

249 0x0CB0, /* Kannada */

250 //0x0D30, /* Malayalam */

251 };

252	40

253 static int	41 static int

254 compare_codepoint (const void pa, const void pb)	42 compare_codepoint (const void pa, const void pb)

255 {	43 {

256 hb_codepoint_t a = * (hb_codepoint_t *) pa;	44 hb_codepoint_t a = * (hb_codepoint_t *) pa;

257 hb_codepoint_t b = * (hb_codepoint_t *) pb;	45 hb_codepoint_t b = * (hb_codepoint_t *) pb;

258	46

259 return a < b ? -1 : a == b ? 0 : +1;	47 return a < b ? -1 : a == b ? 0 : +1;

260 }	48 }

261	49

262 static indic_position_t	50 static indic_position_t

263 consonant_position (hb_codepoint_t u)	51 consonant_position (hb_codepoint_t u)

264 {	52 {

265 consonant_position_t *record;	53 consonant_position_t *record;

266	54

267 record = (consonant_position_t *) bsearch (&u, consonant_positions,	55 record = (consonant_position_t *) bsearch (&u, consonant_positions,

268 ARRAY_LENGTH (consonant_positions),	56 ARRAY_LENGTH (consonant_positions),

269 sizeof (consonant_positions[0]),	57 sizeof (consonant_positions[0]),

270 compare_codepoint);	58 compare_codepoint);

271	59

272 return record ? record->position : POS_BASE;	60 return record ? record->position : POS_BASE_C;

273 }	61 }

274	62

275 static bool	63 static bool

276 is_ra (hb_codepoint_t u)	64 is_ra (hb_codepoint_t u)

277 {	65 {

278 return !!bsearch (&u, ra_chars,	66 return !!bsearch (&u, ra_chars,

279 ARRAY_LENGTH (ra_chars),	67 ARRAY_LENGTH (ra_chars),

280 sizeof (ra_chars[0]),	68 sizeof (ra_chars[0]),

281 compare_codepoint);	69 compare_codepoint);

282 }	70 }

283	71

284 static bool	72 static bool

285 is_joiner (const hb_glyph_info_t &info)	73 is_joiner (const hb_glyph_info_t &info)

286 {	74 {

287 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) \| FLAG (OT_ZWNJ)));	75 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) \| FLAG (OT_ZWNJ)));

288 }	76 }

289	77

290 static bool	78 static bool

291 is_consonant (const hb_glyph_info_t &info)	79 is_consonant (const hb_glyph_info_t &info)

292 {	80 {

293 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) \| FLAG (OT_Ra)));	81 /* Note:

	82 *

	83 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels

	84 * cannot happen in a consonant syllable. The plus side however is, we can ca ll the

	85 * consonant syllable logic from the vowel syllable function and get it all ri ght! */

	86 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) \| FLAG (OT_Ra) \| FLAG (O T_V) \| FLAG (OT_NBSP) \| FLAG (OT_DOTTEDCIRCLE)));

294 }	87 }

295	88

296 static const struct {	89 struct feature_list_t {

297 hb_tag_t tag;	90 hb_tag_t tag;

298 hb_bool_t is_global;	91 hb_bool_t is_global;

299 } indic_basic_features[] =	92 };

	93

	94 static const feature_list_t

	95 indic_basic_features[] =

300 {	96 {

301 {HB_TAG('n','u','k','t'), true},	97 {HB_TAG('n','u','k','t'), true},

302 {HB_TAG('a','k','h','n'), false},	98 {HB_TAG('a','k','h','n'), false},

303 {HB_TAG('r','p','h','f'), false},	99 {HB_TAG('r','p','h','f'), false},

304 {HB_TAG('r','k','r','f'), false},	100 {HB_TAG('r','k','r','f'), true},

305 {HB_TAG('p','r','e','f'), false},	101 {HB_TAG('p','r','e','f'), false},

306 {HB_TAG('b','l','w','f'), false},	102 {HB_TAG('b','l','w','f'), false},

307 {HB_TAG('h','a','l','f'), false},	103 {HB_TAG('h','a','l','f'), false},

308 {HB_TAG('v','a','t','u'), true},

309 {HB_TAG('p','s','t','f'), false},	104 {HB_TAG('p','s','t','f'), false},

310 {HB_TAG('c','j','c','t'), false},	105 {HB_TAG('c','j','c','t'), false},

	106 {HB_TAG('v','a','t','u'), true},

311 };	107 };

312	108

313 /* Same order as the indic_basic_features array */	109 /* Same order as the indic_basic_features array */

314 enum {	110 enum {

315 _NUKT,	111 _NUKT,

316 AKHN,	112 AKHN,

317 RPHF,	113 RPHF,

318 RKRF,	114 _RKRF,

319 PREF,	115 PREF,

320 BLWF,	116 BLWF,

321 HALF,	117 HALF,

322 _VATU,

323 PSTF,	118 PSTF,

324 CJCT	119 CJCT,

	120 VATU

325 };	121 };

326	122

327 static const hb_tag_t indic_other_features[] =	123 static const feature_list_t

	124 indic_other_features[] =

328 {	125 {

329 HB_TAG('p','r','e','s'),	126 {HB_TAG('i','n','i','t'), false},

330 HB_TAG('a','b','v','s'),	127 {HB_TAG('p','r','e','s'), true},

331 HB_TAG('b','l','w','s'),	128 {HB_TAG('a','b','v','s'), true},

332 HB_TAG('p','s','t','s'),	129 {HB_TAG('b','l','w','s'), true},

333 HB_TAG('h','a','l','n'),	130 {HB_TAG('p','s','t','s'), true},

	131 {HB_TAG('h','a','l','n'), true},

334	132

335 HB_TAG('d','i','s','t'),	133 {HB_TAG('d','i','s','t'), true},

336 HB_TAG('a','b','v','m'),	134 {HB_TAG('a','b','v','m'), true},

337 HB_TAG('b','l','w','m'),	135 {HB_TAG('b','l','w','m'), true},

	136 };

	137

	138 /* Same order as the indic_other_features array */

	139 enum {

	140 INIT

338 };	141 };

339	142

340	143

341 static void	144 static void

342 initial_reordering (const hb_ot_map_t *map,	145 initial_reordering (const hb_ot_map_t *map,

343 hb_face_t *face,	146 hb_face_t *face,

344 hb_buffer_t *buffer,	147 hb_buffer_t *buffer,

345 void *user_data HB_UNUSED);	148 void *user_data HB_UNUSED);

346 static void	149 static void

347 final_reordering (const hb_ot_map_t *map,	150 final_reordering (const hb_ot_map_t *map,

348 hb_face_t *face,	151 hb_face_t *face,

349 hb_buffer_t *buffer,	152 hb_buffer_t *buffer,

350 void *user_data HB_UNUSED);	153 void *user_data HB_UNUSED);

351	154

352 void	155 void

353 _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t map, const hb_ segment_properties_t props)	156 _hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map,

	157 » » » » » const hb_segment_properties_t *prop s HB_UNUSED)

354 {	158 {

355 map->add_bool_feature (HB_TAG('l','o','c','l'));	159 map->add_bool_feature (HB_TAG('l','o','c','l'));

356 /* The Indic specs do not require ccmp, but we apply it here since if	160 /* The Indic specs do not require ccmp, but we apply it here since if

357 * there is a use of it, it's typically at the beginning. */	161 * there is a use of it, it's typically at the beginning. */

358 map->add_bool_feature (HB_TAG('c','c','m','p'));	162 map->add_bool_feature (HB_TAG('c','c','m','p'));

359	163

360 map->add_gsub_pause (initial_reordering, NULL);	164 map->add_gsub_pause (initial_reordering, NULL);

361	165

362 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++)	166 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) {

363 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i]. is_global);	167 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i]. is_global);

	168 map->add_gsub_pause (NULL, NULL);

	169 }

364	170

365 map->add_gsub_pause (final_reordering, NULL);	171 map->add_gsub_pause (final_reordering, NULL);

366	172

367 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++)	173 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) {

368 map->add_bool_feature (indic_other_features[i], true);	174 map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i]. is_global);

	175 map->add_gsub_pause (NULL, NULL);

	176 }

369 }	177 }

370	178

371	179

372 bool	180 hb_ot_shape_normalization_mode_t

373 _hb_ot_shape_complex_prefer_decomposed_indic (void)	181 _hb_ot_shape_complex_normalization_preference_indic (void)

374 {	182 {

375 /* We want split matras decomposed by the common shaping logic. */	183 /* We want split matras decomposed by the common shaping logic. */

376 return TRUE;	184 return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;

377 }	185 }

378	186

379	187

380 void	188 void

381 _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t map, hb_buffer_t buffer)	189 _hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,

	190 » » » » » hb_buffer_t *buffer,

	191 » » » » » hb_font_t *font HB_UNUSED)

382 {	192 {

383 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);	193 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);

384 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);	194 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);

385	195

386 /* We cannot setup masks here. We save information about characters	196 /* We cannot setup masks here. We save information about characters

387 * and setup masks later on in a pause-callback. */	197 * and setup masks later on in a pause-callback. */

388	198

389 unsigned int count = buffer->len;	199 unsigned int count = buffer->len;

390 for (unsigned int i = 0; i < count; i++)	200 for (unsigned int i = 0; i < count; i++)

391 {	201 {

392 unsigned int type = get_indic_categories (buffer->info[i].codepoint);	202 hb_glyph_info_t &info = buffer->info[i];

	203 unsigned int type = get_indic_categories (info.codepoint);

393	204

394 buffer->info[i].indic_category() = type & 0x0F;	205 info.indic_category() = type & 0x0F;

395 buffer->info[i].indic_position() = type >> 4;	206 info.indic_position() = type >> 4;

396	207

397 if (buffer->info[i].indic_category() == OT_C) {	208 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe

398 buffer->info[i].indic_position() = consonant_position (buffer->info[i].cod epoint);	209 * treats U+0951..U+0952 all as OT_VD.

399 if (is_ra (buffer->info[i].codepoint))	210 * TESTS:

400 » buffer->info[i].indic_category() = OT_Ra;	211 * U+092E,U+0947,U+0952

401 } else if (buffer->info[i].codepoint == 0x200C)	212 * U+092E,U+0952,U+0947

402 buffer->info[i].indic_category() = OT_ZWNJ;	213 * U+092E,U+0947,U+0951

403 else if (buffer->info[i].codepoint == 0x200D)	214 * U+092E,U+0951,U+0947

404 buffer->info[i].indic_category() = OT_ZWJ;	215 * */

	216 if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954)))

	217 info.indic_category() = OT_VD;

	218

	219 if (info.indic_category() == OT_C) {

	220 info.indic_position() = consonant_position (info.codepoint);

	221 if (is_ra (info.codepoint))

	222 » info.indic_category() = OT_Ra;

	223 } else if (info.indic_category() == OT_SM \|\|

	224 » info.indic_category() == OT_VD) {

	225 info.indic_position() = POS_SMVD;

	226 } else if (unlikely (info.codepoint == 0x200C))

	227 info.indic_category() = OT_ZWNJ;

	228 else if (unlikely (info.codepoint == 0x200D))

	229 info.indic_category() = OT_ZWJ;

	230 else if (unlikely (info.codepoint == 0x25CC))

	231 info.indic_category() = OT_DOTTEDCIRCLE;

405 }	232 }

406 }	233 }

407	234

408 static int	235 static int

409 compare_indic_order (const hb_glyph_info_t pa, const hb_glyph_info_t pb)	236 compare_indic_order (const hb_glyph_info_t pa, const hb_glyph_info_t pb)

410 {	237 {

411 int a = pa->indic_position();	238 int a = pa->indic_position();

412 int b = pb->indic_position();	239 int b = pb->indic_position();

413	240

414 return a < b ? -1 : a == b ? 0 : +1;	241 return a < b ? -1 : a == b ? 0 : +1;

415 }	242 }

416	243

	244 /* Rules from:

	245 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */

	246

417 static void	247 static void

418 found_consonant_syllable (const hb_ot_map_t map, hb_buffer_t buffer, hb_mask_t *mask_array,	248 initial_reordering_consonant_syllable (const hb_ot_map_t map, hb_buffer_t buff er, hb_mask_t *mask_array,

419 » » » unsigned int start, unsigned int end)	249 » » » » unsigned int start, unsigned int end)

420 {	250 {

421 unsigned int i;

422 hb_glyph_info_t *info = buffer->info;	251 hb_glyph_info_t *info = buffer->info;

423	252

424 /* Comments from:

425 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */

426	253

427 /* 1. Find base consonant:	254 /* 1. Find base consonant:

428 *	255 *

429 * The shaping engine finds the base consonant of the syllable, using the	256 * The shaping engine finds the base consonant of the syllable, using the

430 * following algorithm: starting from the end of the syllable, move backwards	257 * following algorithm: starting from the end of the syllable, move backwards

431 * until a consonant is found that does not have a below-base or post-base	258 * until a consonant is found that does not have a below-base or post-base

432 * form (post-base forms have to follow below-base forms), or that is not a	259 * form (post-base forms have to follow below-base forms), or that is not a

433 * pre-base reordering Ra, or arrive at the first consonant. The consonant	260 * pre-base reordering Ra, or arrive at the first consonant. The consonant

434 * stopped at will be the base.	261 * stopped at will be the base.

435 *	262 *

436 * o If the syllable starts with Ra + Halant (in a script that has Reph)	263 * o If the syllable starts with Ra + Halant (in a script that has Reph)

437 * and has more than one consonant, Ra is excluded from candidates for	264 * and has more than one consonant, Ra is excluded from candidates for

438 * base consonants.	265 * base consonants.

439 */	266 */

440	267

441 unsigned int base = end;	268 unsigned int base = end;

	269 bool has_reph = false;

442	270

443 /* -> starting from the end of the syllable, move backwards */	271 {

444 i = end;	272 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)

445 unsigned int limit = start;	273 * and has more than one consonant, Ra is excluded from candidates for

446 if (info[start].indic_category() == OT_Ra && start + 2 <= end) {	274 * base consonants. */

447 limit += 2;	275 unsigned int limit = start;

448 base = start;	276 if (mask_array[RPHF] &&

449 };	277 » start + 3 <= end &&

450 do {	278 » info[start].indic_category() == OT_Ra &&

451 i--;	279 » info[start + 1].indic_category() == OT_H &&

452 /* -> until a consonant is found */	280 » !is_joiner (info[start + 2]))

453 if (is_consonant (info[i]))

454 {	281 {

455 /* -> that does not have a below-base or post-base form	282 limit += 2;

456 * (post-base forms have to follow below-base forms), */	283 base = start;

457 if (info[i].indic_position() != POS_BELOW &&	284 has_reph = true;

458 » info[i].indic_position() != POS_POST)	285 };

	286

	287 /* -> starting from the end of the syllable, move backwards */

	288 unsigned int i = end;

	289 do {

	290 i--;

	291 /* -> until a consonant is found */

	292 if (is_consonant (info[i]))

459 {	293 {

460 base = i;	294 » /* -> that does not have a below-base or post-base form

461 » break;	295 » * (post-base forms have to follow below-base forms), */

	296 » if (info[i].indic_position() != POS_BELOW_C &&

	297 » info[i].indic_position() != POS_POST_C)

	298 » {

	299 » base = i;

	300 » break;

	301 » }

	302

	303 » /* -> or that is not a pre-base reordering Ra,

	304 » *

	305 » * TODO

	306 » */

	307

	308 » /* -> or arrive at the first consonant. The consonant stopped at will

	309 » * be the base. */

	310 » base = i;

462 }	311 }

	312 else

	313 if (is_joiner (info[i]))

	314 break;

	315 } while (i > limit);

	316 if (base < start)

	317 base = start; /* Just in case... */

463	318

464 /* -> or that is not a pre-base reordering Ra,

465 *

466 * TODO

467 */

468	319

469 /* -> o If the syllable starts with Ra + Halant (in a script that has Rep h)	320 /* -> If the syllable starts with Ra + Halant (in a script that has Reph)

470 * and has more than one consonant, Ra is excluded from candidates f or	321 * and has more than one consonant, Ra is excluded from candidates for

471 * base consonants.	322 * base consonants. */

472 *	323 if (has_reph && base == start) {

473 * IMPLEMENTATION NOTES:	324 /* Have no other consonant, so Reph is not formed and Ra becomes base. */

474 *	325 has_reph = false;

475 * We do this by adjusting limit accordingly before entering the loop.

476 */

477

478 /* -> or arrive at the first consonant. The consonant stopped at will

479 * be the base. */

480 base = i;

481 }	326 }

482 else	327 }

483 if (is_joiner (info[i]))

484 break;

485 } while (i > limit);

486 if (base < start)

487 base = start; /* Just in case... */

488	328

489	329

490 /* 2. Decompose and reorder Matras:	330 /* 2. Decompose and reorder Matras:

491 *	331 *

492 * Each matra and any syllable modifier sign in the cluster are moved to the	332 * Each matra and any syllable modifier sign in the cluster are moved to the

493 * appropriate position relative to the consonant(s) in the cluster. The	333 * appropriate position relative to the consonant(s) in the cluster. The

494 * shaping engine decomposes two- or three-part matras into their constituent	334 * shaping engine decomposes two- or three-part matras into their constituent

495 * parts before any repositioning. Matra characters are classified by which	335 * parts before any repositioning. Matra characters are classified by which

496 * consonant in a conjunct they have affinity for and are reordered to the	336 * consonant in a conjunct they have affinity for and are reordered to the

497 * following positions:	337 * following positions:

(...skipping 16 matching lines...) Expand all Loading...
514 * if necessary, so that the nukta is first.	354 * if necessary, so that the nukta is first.

515 *	355 *

516 * IMPLEMENTATION NOTES:	356 * IMPLEMENTATION NOTES:

517 *	357 *

518 * We don't need to do this: the normalize() routine already did this for us.	358 * We don't need to do this: the normalize() routine already did this for us.

519 */	359 */

520	360

521	361

522 /* Reorder characters */	362 /* Reorder characters */

523	363

524 for (i = start; i < base; i++)	364 for (unsigned int i = start; i < base; i++)

525 info[i].indic_position() = POS_PRE;	365 info[i].indic_position() = POS_PRE_C;

526 info[base].indic_position() = POS_BASE;	366 info[base].indic_position() = POS_BASE_C;

527

528	367

529 /* Handle beginning Ra */	368 /* Handle beginning Ra */

530 if (start + 3 <= end &&	369 if (has_reph)

531 info[start].indic_category() == OT_Ra &&	370 info[start].indic_position() = POS_RA_TO_BECOME_REPH;

532 info[start + 1].indic_category() == OT_H &&

533 !is_joiner (info[start + 2]))

534 {

535 info[start].indic_position() = POS_POST;

536 info[start].mask = mask_array[RPHF];

537 }

538	371

539 /* For old-style Indic script tags, move the first post-base Halant after	372 /* For old-style Indic script tags, move the first post-base Halant after

540 * last consonant. */	373 * last consonant. */

541 if ((map->get_chosen_script (0) & 0x000000FF) != '2') {	374 if ((map->get_chosen_script (0) & 0x000000FF) != '2') {

542 /* We should only do this for Indic scripts which have a version two I guess . */	375 /* We should only do this for Indic scripts which have a version two I guess . */

543 for (i = base + 1; i < end; i++)	376 for (unsigned int i = base + 1; i < end; i++)

544 if (info[i].indic_category() == OT_H) {	377 if (info[i].indic_category() == OT_H) {

545 unsigned int j;	378 unsigned int j;

546 for (j = end - 1; j > i; j--)	379 for (j = end - 1; j > i; j--)

547 » if ((FLAG (info[j].indic_category()) & (FLAG (OT_C) \| FLAG (OT_Ra))))	380 » if (is_consonant (info[j]))

548 break;	381 break;

549 if (j > i) {	382 if (j > i) {

550 /* Move Halant to after last consonant. */	383 /* Move Halant to after last consonant. */

551 hb_glyph_info_t t = info[i];	384 hb_glyph_info_t t = info[i];

552 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));	385 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));

553 info[j] = t;	386 info[j] = t;

554 }	387 }

555 break;	388 break;

556 }	389 }

557 }	390 }

558	391

559 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */	392 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */

560 for (i = start + 1; i < end; i++)	393 if (!options.uniscribe_bug_compatible)

561 if ((FLAG (info[i].indic_category()) &	394 {

562 » (FLAG (OT_ZWNJ) \| FLAG (OT_ZWJ) \| FLAG (OT_N) \| FLAG (OT_H))))	395 /* Please update the Uniscribe branch when touching this! */

563 info[i].indic_position() = info[i - 1].indic_position();	396 for (unsigned int i = start + 1; i < end; i++)

	397 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) \| FLAG (OT_ZWJ) \| F LAG (OT_N) \| FLAG (OT_H))))

	398 » info[i].indic_position() = info[i - 1].indic_position();

	399 } else {

	400 /*

	401 * Uniscribe doesn't move the Halant with Left Matra.

	402 * TEST: U+092B,U+093F,U+094DE

	403 */

	404 /* Please update the non-Uniscribe branch when touching this! */

	405 for (unsigned int i = start + 1; i < end; i++)

	406 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) \| FLAG (OT_ZWJ) \| F LAG (OT_N) \| FLAG (OT_H)))) {

	407 » info[i].indic_position() = info[i - 1].indic_position();

	408 » if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_ PRE_M)

	409 » for (unsigned int j = i; j > start; j--)

	410 » if (info[j - 1].indic_position() != POS_PRE_M) {

	411 » info[i].indic_position() = info[j - 1].indic_position();

	412 » break;

	413 » }

	414 }

	415 }

564	416

565 /* We do bubble-sort, skip malicious clusters attempts */	417 /* We do bubble-sort, skip malicious clusters attempts */

566 if (end - start > 20)	418 if (end - start < 64)

567 return;	419 {

568	420 /* Sit tight, rock 'n roll! */

569 /* Sit tight, rock 'n roll! */	421 hb_bubble_sort (info + start, end - start, compare_indic_order);

570 hb_bubble_sort (info + start, end - start, compare_indic_order);	422 /* Find base again */

	423 base = end;

	424 for (unsigned int i = start; i < end; i++)

	425 if (info[i].indic_position() == POS_BASE_C) {

	426 base = i;

	427 » break;

	428 }

	429 }

571	430

572 /* Setup masks now */	431 /* Setup masks now */

573	432

574 {	433 {

575 hb_mask_t mask;	434 hb_mask_t mask;

576	435

	436 /* Reph */

	437 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_T O_BECOME_REPH; i++)

	438 info[i].mask \|= mask_array[RPHF];

	439

577 /* Pre-base */	440 /* Pre-base */

578 mask = mask_array[HALF] \| mask_array[AKHN] \| mask_array[CJCT];	441 mask = mask_array[HALF] \| mask_array[AKHN] \| mask_array[CJCT];

579 for (i = start; i < base; i++)	442 for (unsigned int i = start; i < base; i++)

580 info[i].mask \|= mask;	443 info[i].mask \|= mask;

581 /* Base */	444 /* Base */

582 mask = mask_array[AKHN] \| mask_array[CJCT];	445 mask = mask_array[AKHN] \| mask_array[CJCT];

583 info[base].mask \|= mask;	446 info[base].mask \|= mask;

584 /* Post-base */	447 /* Post-base */

585 mask = mask_array[BLWF] \| mask_array[PSTF] \| mask_array[CJCT];	448 mask = mask_array[BLWF] \| mask_array[PSTF] \| mask_array[CJCT];

586 for (i = base + 1; i < end; i++)	449 for (unsigned int i = base + 1; i < end; i++)

587 info[i].mask \|= mask;	450 info[i].mask \|= mask;

588 }	451 }

589	452

590 /* Apply ZWJ/ZWNJ effects */	453 /* Apply ZWJ/ZWNJ effects */

591 for (i = start + 1; i < end; i++)	454 for (unsigned int i = start + 1; i < end; i++)

592 if (is_joiner (info[i])) {	455 if (is_joiner (info[i])) {

593 bool non_joiner = info[i].indic_category() == OT_ZWNJ;	456 bool non_joiner = info[i].indic_category() == OT_ZWNJ;

594 unsigned int j = i;	457 unsigned int j = i;

595	458

596 do {	459 do {

597 j--;	460 j--;

598	461

599 » /* Reading the Unicode and OpenType specs, I think the following line	462 » info[j].mask &= ~mask_array[CJCT];

600 » * is correct, but this is not what the test suite expects currently.

601 » * The test suite has been drinking, not me... But disable while

602 » * investigating.

603 » */

604 » //info[j].mask &= !mask_array[CJCT];

605 if (non_joiner)	463 if (non_joiner)

606 » info[j].mask &= !mask_array[HALF];	464 » info[j].mask &= ~mask_array[HALF];

607	465

608 } while (j > start && !is_consonant (info[j]));	466 } while (j > start && !is_consonant (info[j]));

609 }	467 }

610 }	468 }

611	469

612	470

613 static void	471 static void

614 found_vowel_syllable (const hb_ot_map_t map, hb_buffer_t buffer, hb_mask_t *ma sk_array,	472 initial_reordering_vowel_syllable (const hb_ot_map_t *map,

615 » » unsigned int start, unsigned int end)	473 » » » » hb_buffer_t *buffer,

	474 » » » » hb_mask_t *mask_array,

	475 » » » » unsigned int start, unsigned int end)

616 {	476 {

617 /* TODO	477 /* We made the vowels look like consonants. So let's call the consonant logic ! */

618 * Not clear to me how this should work. Do the matras move to before the	478 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end);

619 * independent vowel? No idea.

620 */

621 }	479 }

622	480

623 static void	481 static void

624 found_standalone_cluster (const hb_ot_map_t map, hb_buffer_t buffer, hb_mask_t *mask_array,	482 initial_reordering_standalone_cluster (const hb_ot_map_t *map,

625 » » » unsigned int start, unsigned int end)	483 » » » » hb_buffer_t *buffer,

	484 » » » » hb_mask_t *mask_array,

	485 » » » » unsigned int start, unsigned int end)

626 {	486 {

627 /* TODO	487 /* We treat NBSP/dotted-circle as if they are consonants, so we should just ch ain.

628 * Easiest thing to do here is to convert the NBSP to consonant and	488 * Only if not in compatibility mode that is... */

629 * call found_consonant_syllable.	489

630 */	490 if (options.uniscribe_bug_compatible)

	491 {

	492 /* For dotted-circle, this is what Uniscribe does:

	493 * If dotted-circle is the last glyph, it just does nothing.

	494 * Ie. It doesn't form Reph. */

	495 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)

	496 return;

	497 }

	498

	499 initial_reordering_consonant_syllable (map, buffer, mask_array, start, end);

631 }	500 }

632	501

633 static void	502 static void

634 found_non_indic (const hb_ot_map_t map, hb_buffer_t buffer, hb_mask_t *mask_ar ray,	503 initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED,

635 » » unsigned int start, unsigned int end)	504 » » » hb_buffer_t *buffer HB_UNUSED,

	505 » » » hb_mask_t *mask_array HB_UNUSED,

	506 » » » unsigned int start HB_UNUSED, unsigned int end HB_ UNUSED)

636 {	507 {

637 /* Nothing to do right now. If we ever switch to using the output	508 /* Nothing to do right now. If we ever switch to using the output

638 * buffer in the reordering process, we'd need to next_glyph() here. */	509 * buffer in the reordering process, we'd need to next_glyph() here. */

639 }	510 }

640	511

641 #include "hb-ot-shape-complex-indic-machine.hh"	512 #include "hb-ot-shape-complex-indic-machine.hh"

642	513

643 static void	514 static void

644 remove_joiners (hb_buffer_t *buffer)

645 {

646 /* For now we remove joiners. However, Uniscbire seems to keep them

647 * and output a zero-width space glyph for them. It is not clear to

648 * me how that is supposed to interact with GSUB. */

649

650 buffer->clear_output ();

651 unsigned int count = buffer->len;

652 for (buffer->idx = 0; buffer->idx < count;)

653 if (unlikely (is_joiner (buffer->info[buffer->idx])))

654 buffer->skip_glyph ();

655 else

656 buffer->next_glyph ();

657

658 buffer->swap_buffers ();

659 }

660

661 static void

662 initial_reordering (const hb_ot_map_t *map,	515 initial_reordering (const hb_ot_map_t *map,

663 » » hb_face_t *face,	516 » » hb_face_t *face HB_UNUSED,

664 hb_buffer_t *buffer,	517 hb_buffer_t *buffer,

665 void *user_data HB_UNUSED)	518 void *user_data HB_UNUSED)

666 {	519 {

667 hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};	520 hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};

668 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);	521 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);

669 for (unsigned int i = 0; i < num_masks; i++)	522 for (unsigned int i = 0; i < num_masks; i++)

670 mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);	523 mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);

671	524

672 find_syllables (map, buffer, mask_array);	525 find_syllables (map, buffer, mask_array);

673

674 remove_joiners (buffer);

675 }	526 }

676	527

677 static void	528 static void

678 final_reordering (const hb_ot_map_t *map,	529 final_reordering_syllable (hb_buffer_t buffer, hb_mask_t mask_array,

679 » » hb_face_t *face,	530 » » » unsigned int start, unsigned int end)

680 » » hb_buffer_t *buffer,

681 » » void *user_data HB_UNUSED)

682 {	531 {

	532 hb_glyph_info_t *info = buffer->info;

	533

683 /* 4. Final reordering:	534 /* 4. Final reordering:

684 *	535 *

685 * After the localized forms and basic shaping forms GSUB features have been	536 * After the localized forms and basic shaping forms GSUB features have been

686 * applied (see below), the shaping engine performs some final glyph	537 * applied (see below), the shaping engine performs some final glyph

687 * reordering before applying all the remaining font features to the entire	538 * reordering before applying all the remaining font features to the entire

688 * cluster.	539 * cluster.

689 *	540 */

690 * o Reorder matras:	541

	542 /* Find base again */

	543 unsigned int base = end;

	544 for (unsigned int i = start; i < end; i++)

	545 if (info[i].indic_position() == POS_BASE_C) {

	546 base = i;

	547 break;

	548 }

	549

	550 if (base == start) {

	551 /* There's no Reph, and no left Matra to reposition. Just merge the cluster

	552 * and go home. */

	553 buffer->merge_clusters (start, end);

	554 return;

	555 }

	556

	557 unsigned int start_of_last_cluster = base;

	558

	559 /* o Reorder matras:

691 *	560 *

692 * If a pre-base matra character had been reordered before applying basic	561 * If a pre-base matra character had been reordered before applying basic

693 * features, the glyph can be moved closer to the main consonant based on	562 * features, the glyph can be moved closer to the main consonant based on

694 * whether half-forms had been formed. Actual position for the matra is	563 * whether half-forms had been formed. Actual position for the matra is

695 * defined as “after last standalone halant glyph, after initial matra	564 * defined as “after last standalone halant glyph, after initial matra

696 * position and before the main consonant”. If ZWJ or ZWNJ follow this	565 * position and before the main consonant”. If ZWJ or ZWNJ follow this

697 * halant, position is moved after it.	566 * halant, position is moved after it.

698 *	567 */

699 * o Reorder reph:	568

	569 {

	570 unsigned int new_matra_pos = base - 1;

	571 while (new_matra_pos > start &&

	572 » !(FLAG (info[new_matra_pos].indic_category()) & (FLAG (OT_M) \| FLAG ( OT_H))))

	573 new_matra_pos--;

	574 /* If we found no Halant we are done. Otherwise only proceed if the Halant does

	575 * not belong to the Matra itself! */

	576 if (info[new_matra_pos].indic_category() == OT_H &&

	577 » info[new_matra_pos].indic_position() != POS_PRE_M) {

	578 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */

	579 if (new_matra_pos + 1 < end && is_joiner (info[new_matra_pos + 1]))

	580 » new_matra_pos++;

	581

	582 /* Now go see if there's actually any matras... */

	583 for (unsigned int i = new_matra_pos; i > start; i--)

	584 » if (info[i - 1].indic_position () == POS_PRE_M)

	585 » {

	586 » unsigned int old_matra_pos = i - 1;

	587 » hb_glyph_info_t matra = info[old_matra_pos];

	588 » memmove (&info[old_matra_pos], &info[old_matra_pos + 1], (new_matra_po s - old_matra_pos) * sizeof (info[0]));

	589 » info[new_matra_pos] = matra;

	590 » start_of_last_cluster = MIN (new_matra_pos, start_of_last_cluster);

	591 » new_matra_pos--;

	592 » }

	593 }

	594 }

	595

	596

	597 /* o Reorder reph:

700 *	598 *

701 * Reph’s original position is always at the beginning of the syllable,	599 * Reph’s original position is always at the beginning of the syllable,

702 * (i.e. it is not reordered at the character reordering stage). However,	600 * (i.e. it is not reordered at the character reordering stage). However,

703 * it will be reordered according to the basic-forms shaping results.	601 * it will be reordered according to the basic-forms shaping results.

704 * Possible positions for reph, depending on the script, are; after main,	602 * Possible positions for reph, depending on the script, are; after main,

705 * before post-base consonant forms, and after post-base consonant forms.	603 * before post-base consonant forms, and after post-base consonant forms.

706 *	604 */

707 * 1. If reph should be positioned after post-base consonant forms,	605

708 * proceed to step 5.	606 /* If there's anything after the Ra that has the REPH pos, it ought to be hala nt.

709 *	607 * Which means that the font has failed to ligate the Reph. In which case, we

710 * 2. If the reph repositioning class is not after post-base: target	608 * shouldn't move. */

711 * position is after the first explicit halant glyph between the	609 if (start + 1 < end &&

712 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ	610 info[start].indic_position() == POS_RA_TO_BECOME_REPH &&

713 * are following this halant, position is moved after it. If such	611 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH)

714 * position is found, this is the target position. Otherwise,	612 {

715 * proceed to the next step.	613 unsigned int new_reph_pos;

716 *	614

717 * Note: in old-implementation fonts, where classifications were	615 enum reph_position_t {

718 * fixed in shaping engine, there was no case where reph position	616 REPH_AFTER_MAIN,

719 * will be found on this step.	617 REPH_BEFORE_SUBSCRIPT,

720 *	618 REPH_AFTER_SUBSCRIPT,

721 * 3. If reph should be repositioned after the main consonant: from the	619 REPH_BEFORE_POSTSCRIPT,

722 * first consonant not ligated with main, or find the first	620 REPH_AFTER_POSTSCRIPT,

723 * consonant that is not a potential pre-base reordering Ra.	621 } reph_pos;

724 *	622

725 *	623 /* XXX Figure out old behavior too */

726 * 4. If reph should be positioned before post-base consonant, find	624 switch ((hb_tag_t) buffer->props.script)

727 * first post-base classified consonant not ligated with main. If no	625 {

728 * consonant is found, the target position should be before the	626 case HB_SCRIPT_MALAYALAM:

729 * first matra, syllable modifier sign or vedic sign.	627 case HB_SCRIPT_ORIYA:

730 *	628 » reph_pos = REPH_AFTER_MAIN;

731 * 5. If no consonant is found in steps 3 or 4, move reph to a position	629 » break;

732 * immediately before the first post-base matra, syllable modifier	630

733 * sign or vedic sign that has a reordering class after the intended	631 case HB_SCRIPT_GURMUKHI:

734 * reph position. For example, if the reordering position for reph	632 » reph_pos = REPH_BEFORE_SUBSCRIPT;

735 * is post-main, it will skip above-base matras that also have a	633 » break;

736 * post-main position.	634

737 *	635 case HB_SCRIPT_BENGALI:

738 * 6. Otherwise, reorder reph to the end of the syllable.	636 » reph_pos = REPH_AFTER_SUBSCRIPT;

739 *	637 » break;

740 * o Reorder pre-base reordering consonants:	638

	639 default:

	640 case HB_SCRIPT_DEVANAGARI:

	641 case HB_SCRIPT_GUJARATI:

	642 » reph_pos = REPH_BEFORE_POSTSCRIPT;

	643 » break;

	644

	645 case HB_SCRIPT_KANNADA:

	646 case HB_SCRIPT_TAMIL:

	647 case HB_SCRIPT_TELUGU:

	648 » reph_pos = REPH_AFTER_POSTSCRIPT;

	649 » break;

	650 }

	651

	652 /* 1. If reph should be positioned after post-base consonant forms,

	653 * proceed to step 5.

	654 */

	655 if (reph_pos == REPH_AFTER_POSTSCRIPT)

	656 {

	657 goto reph_step_5;

	658 }

	659

	660 /* 2. If the reph repositioning class is not after post-base: target

	661 * position is after the first explicit halant glyph between the

	662 * first post-reph consonant and last main consonant. If ZWJ or ZWN J

	663 * are following this halant, position is moved after it. If such

	664 * position is found, this is the target position. Otherwise,

	665 * proceed to the next step.

	666 *

	667 * Note: in old-implementation fonts, where classifications were

	668 * fixed in shaping engine, there was no case where reph position

	669 * will be found on this step.

	670 */

	671 {

	672 new_reph_pos = start + 1;

	673 while (new_reph_pos < base && info[new_reph_pos].indic_category() != OT_H)

	674 » new_reph_pos++;

	675

	676 if (new_reph_pos < base && info[new_reph_pos].indic_category() == OT_H) {

	677 » /* ->If ZWJ or ZWNJ are following this halant, position is moved after i t. */

	678 » if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))

	679 » new_reph_pos++;

	680 » goto reph_move;

	681 }

	682 }

	683

	684 /* 3. If reph should be repositioned after the main consonant: find th e

	685 * first consonant not ligated with main, or find the first

	686 * consonant that is not a potential pre-base reordering Ra.

	687 */

	688 if (reph_pos == REPH_AFTER_MAIN)

	689 {

	690 /* XXX */

	691 }

	692

	693 /* 4. If reph should be positioned before post-base consonant, find

	694 * first post-base classified consonant not ligated with main. If n o

	695 * consonant is found, the target position should be before the

	696 * first matra, syllable modifier sign or vedic sign.

	697 */

	698 /* This is our take on what step 4 is trying to say (and failing, BADLY). */

	699 if (reph_pos == REPH_AFTER_SUBSCRIPT)

	700 {

	701 new_reph_pos = base;

	702 while (new_reph_pos < end &&

	703 » !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST _C) \| FLAG (POS_POST_M) \| FLAG (POS_SMVD))))

	704 » new_reph_pos++;

	705 if (new_reph_pos < end)

	706 goto reph_move;

	707 }

	708

	709 /* 5. If no consonant is found in steps 3 or 4, move reph to a positio n

	710 * immediately before the first post-base matra, syllable modifier

	711 * sign or vedic sign that has a reordering class after the intende d

	712 * reph position. For example, if the reordering position for reph

	713 * is post-main, it will skip above-base matras that also have a

	714 * post-main position.

	715 */

	716 reph_step_5:

	717 {

	718 /* XXX */

	719 }

	720

	721 /* 6. Otherwise, reorder reph to the end of the syllable.

	722 */

	723 {

	724 new_reph_pos = end - 1;

	725 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_ SMVD)

	726 » new_reph_pos--;

	727

	728 /*

	729 * If the Reph is to be ending up after a Matra,Halant sequence,

	730 * position it before that Halant so it can interact with the Matra.

	731 * However, if it's a plain Consonant,Halant we shouldn't do that.

	732 * Uniscribe doesn't do this.

	733 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D

	734 */

	735 if (!options.uniscribe_bug_compatible &&

	736 » unlikely (info[new_reph_pos].indic_category() == OT_H)) {

	737 » for (unsigned int i = base + 1; i < new_reph_pos; i++)

	738 » if (info[i].indic_category() == OT_M) {

	739 » /* Ok, got it. */

	740 » new_reph_pos--;

	741 » }

	742 }

	743 goto reph_move;

	744 }

	745

	746 reph_move:

	747 {

	748 /* Move */

	749 hb_glyph_info_t reph = info[start];

	750 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof ( info[0]));

	751 info[new_reph_pos] = reph;

	752 start_of_last_cluster = start; /* Yay, one big cluster! */

	753 }

	754 }

	755

	756

	757 /* o Reorder pre-base reordering consonants:

741 *	758 *

742 * If a pre-base reordering consonant is found, reorder it according to	759 * If a pre-base reordering consonant is found, reorder it according to

743 * the following rules:	760 * the following rules:

744 *	761 *

745 * 1. Only reorder a glyph produced by substitution during application	762 * 1. Only reorder a glyph produced by substitution during application

746 * of the feature. (Note that a font may shape a Ra consonant with	763 * of the feature. (Note that a font may shape a Ra consonant with

747 * the feature generally but block it in certain contexts.)	764 * the feature generally but block it in certain contexts.)

748 *	765 *

749 * 2. Try to find a target position the same way as for pre-base matra.	766 * 2. Try to find a target position the same way as for pre-base matra.

750 * If it is found, reorder pre-base consonant glyph.	767 * If it is found, reorder pre-base consonant glyph.

751 *	768 *

752 * 3. If position is not found, reorder immediately before main	769 * 3. If position is not found, reorder immediately before main

753 * consonant.	770 * consonant.

754 */	771 */

755	772

756 /* TODO */	773 /* TODO */

757	774

758	775

759	776

	777 /* Apply 'init' to the Left Matra if it's a word start. */

	778 if (info[start].indic_position () == POS_PRE_M &&

	779 (!start \|\|

	780 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) &

	781 (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) \|

	782 FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) \|

	783 FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) \|

	784 FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) \|

	785 FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) \|

	786 FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) \|

	787 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) \|

	788 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))))

	789 info[start].mask \|= mask_array[INIT];

	790

	791

	792

	793 /* Finish off the clusters and go home! */

	794

	795 if (!options.uniscribe_bug_compatible)

	796 {

	797 /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWN J.

	798 * This means, half forms are submerged into the main consonants cluster.

	799 * This is unnecessary, and makes cursor positioning harder, but that's what

	800 * Uniscribe does. */

	801 unsigned int cluster_start = start;

	802 for (unsigned int i = start + 1; i < start_of_last_cluster; i++)

	803 if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT _ZWNJ) {

	804 i++;

	805 buffer->merge_clusters (cluster_start, i);

	806 cluster_start = i;

	807 }

	808 start_of_last_cluster = cluster_start;

	809 }

	810

	811 buffer->merge_clusters (start_of_last_cluster, end);

	812 }

	813

	814

	815 static void

	816 final_reordering (const hb_ot_map_t *map,

	817 hb_face_t *face HB_UNUSED,

	818 hb_buffer_t *buffer,

	819 void *user_data HB_UNUSED)

	820 {

	821 unsigned int count = buffer->len;

	822 if (!count) return;

	823

	824 hb_mask_t mask_array[ARRAY_LENGTH (indic_other_features)] = {0};

	825 unsigned int num_masks = ARRAY_LENGTH (indic_other_features);

	826 for (unsigned int i = 0; i < num_masks; i++)

	827 mask_array[i] = map->get_1_mask (indic_other_features[i].tag);

	828

	829 hb_glyph_info_t *info = buffer->info;

	830 unsigned int last = 0;

	831 unsigned int last_syllable = info[0].syllable();

	832 for (unsigned int i = 1; i < count; i++)

	833 if (last_syllable != info[i].syllable()) {

	834 final_reordering_syllable (buffer, mask_array, last, i);

	835 last = i;

	836 last_syllable = info[last].syllable();

	837 }

	838 final_reordering_syllable (buffer, mask_array, last, count);

	839

760 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);	840 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);

761 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);	841 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);

762 }	842 }

763	843

764	844

765	845

OLD	NEW