third_party/harfbuzz-ng/src/hb-ot-shape-complex-misc.cc - Issue 10915172: harfbuzz-ng roll

Side by Side Diff: third_party/harfbuzz-ng/src/hb-ot-shape-complex-misc.cc

Issue 10915172: harfbuzz-ng roll (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « third_party/harfbuzz-ng/src/hb-ot-shape-complex-indic-private.hh ('k') | third_party/harfbuzz-ng/src/hb-ot-shape-complex-private.hh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright © 2010 Google, Inc.	2 * Copyright © 2010,2012 Google, Inc.

3 *	3 *

4 * This is part of HarfBuzz, a text shaping library.	4 * This is part of HarfBuzz, a text shaping library.

5 *	5 *

6 * Permission is hereby granted, without written agreement and without	6 * Permission is hereby granted, without written agreement and without

7 * license or royalty fees, to use, copy, modify, and distribute this	7 * license or royalty fees, to use, copy, modify, and distribute this

8 * software and its documentation for any purpose, provided that the	8 * software and its documentation for any purpose, provided that the

9 * above copyright notice and the following two paragraphs appear in	9 * above copyright notice and the following two paragraphs appear in

10 * all copies of this software.	10 * all copies of this software.

11 *	11 *

12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR	12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR

13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES	13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES

14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN	14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN

15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH	15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH

16 * DAMAGE.	16 * DAMAGE.

17 *	17 *

18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,	18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,

19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND	19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS	20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS

21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO	21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO

22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.	22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

23 *	23 *

24 * Google Author(s): Behdad Esfahbod	24 * Google Author(s): Behdad Esfahbod

25 */	25 */

26	26

27 #include "hb-ot-shape-complex-private.hh"	27 #include "hb-ot-shape-complex-private.hh"

28	28

29	29

30 /* TODO Add kana, and other small shapers here */	30 /* TODO Add kana, and other small shapers here */

31	31

32 /* When adding trivial shapers, eg. kana, hangul, etc, we can either

33 * add a full shaper enum value for them, or switch on the script in

34 * the default complex shaper. The former is faster, so I think that's

35 * what we would do, and hence the default complex shaper shall remain

36 * empty.

37 */

38	32

39 void	33 /* The default shaper only adds additional per-script features.*/

40 _hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map HB_UNUSE D,

41 » » » » » const hb_segment_properties_t *pr ops HB_UNUSED)

42 {

43 }

44

45 hb_ot_shape_normalization_mode_t

46 _hb_ot_shape_complex_normalization_preference_default (void)

47 {

48 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;

49 }

50

51 void

52 _hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map HB_UNUSED,

53 » » » » » hb_buffer_t *buffer HB_UNUSED,

54 » » » » » hb_font_t *font HB_UNUSED)

55 {

56 }

57

58

59

60 /* Hangul shaper */

61	34

62 static const hb_tag_t hangul_features[] =	35 static const hb_tag_t hangul_features[] =

63 {	36 {

64 HB_TAG('l','j','m','o'),	37 HB_TAG('l','j','m','o'),

65 HB_TAG('v','j','m','o'),	38 HB_TAG('v','j','m','o'),

66 HB_TAG('t','j','m','o'),	39 HB_TAG('t','j','m','o'),

	40 HB_TAG_NONE

67 };	41 };

68	42

69 void	43 static const hb_tag_t tibetan_features[] =

70 _hb_ot_shape_complex_collect_features_hangul (hb_ot_map_builder_t *map,

71 » » » » » const hb_segment_properties_t *pro ps HB_UNUSED)

72 {	44 {

73 for (unsigned int i = 0; i < ARRAY_LENGTH (hangul_features); i++)	45 HB_TAG('a','b','v','s'),

74 map->add_bool_feature (hangul_features[i]);	46 HB_TAG('b','l','w','s'),

	47 HB_TAG('a','b','v','m'),

	48 HB_TAG('b','l','w','m'),

	49 HB_TAG_NONE

	50 };

	51

	52 static void

	53 collect_features_default (hb_ot_shape_planner_t *plan)

	54 {

	55 const hb_tag_t *script_features = NULL;

	56

	57 switch ((hb_tag_t) plan->props.script)

	58 {

	59 /* Unicode-1.1 additions */

	60 case HB_SCRIPT_HANGUL:

	61 script_features = hangul_features;

	62 break;

	63

	64 /* Unicode-2.0 additions */

	65 case HB_SCRIPT_TIBETAN:

	66 script_features = tibetan_features;

	67 break;

	68 }

	69

	70 for (; script_features && *script_features; script_features++)

	71 plan->map.add_bool_feature (*script_features);

75 }	72 }

76	73

77 hb_ot_shape_normalization_mode_t	74 static hb_ot_shape_normalization_mode_t

78 _hb_ot_shape_complex_normalization_preference_hangul (void)	75 normalization_preference_default (const hb_ot_shape_plan_t *plan)

79 {	76 {

80 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;	77 switch ((hb_tag_t) plan->props.script)

	78 {

	79 /* Unicode-1.1 additions */

	80 case HB_SCRIPT_HANGUL:

	81 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;

	82 }

	83 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;

81 }	84 }

82	85

83 void	86 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =

84 _hb_ot_shape_complex_setup_masks_hangul (hb_ot_map_t *map HB_UNUSED,

85 » » » » » hb_buffer_t *buffer HB_UNUSED,

86 » » » » » hb_font_t *font HB_UNUSED)

87 {	87 {

88 }	88 "default",

89	89 collect_features_default,

	90 NULL, /* override_features */

	91 NULL, /* data_create */

	92 NULL, /* data_destroy */

	93 NULL, /* preprocess_text */

	94 normalization_preference_default,

	95 NULL, /* setup_masks */

	96 true, /* zero_width_attached_marks */

	97 };

90	98

91	99

92 /* Thai / Lao shaper */	100 /* Thai / Lao shaper */

93	101

94 void	102 static void

95 _hb_ot_shape_complex_collect_features_thai (hb_ot_map_builder_t *map HB_UNUSED,	103 preprocess_text_thai (const hb_ot_shape_plan_t *plan HB_UNUSED,

96 » » » » » const hb_segment_properties_t *props HB_UNUSED)	104 » » hb_buffer_t *buffer,

97 {	105 » » hb_font_t *font HB_UNUSED)

98 }

99

100 hb_ot_shape_normalization_mode_t

101 _hb_ot_shape_complex_normalization_preference_thai (void)

102 {

103 return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;

104 }

105

106 void

107 _hb_ot_shape_complex_setup_masks_thai (hb_ot_map_t *map HB_UNUSED,

108 » » » » hb_buffer_t *buffer,

109 » » » » hb_font_t *font HB_UNUSED)

110 {	106 {

111 /* The following is NOT specified in the MS OT Thai spec, however, it seems	107 /* The following is NOT specified in the MS OT Thai spec, however, it seems

112 * to be what Uniscribe and other engines implement. According to Eric Muller :	108 * to be what Uniscribe and other engines implement. According to Eric Muller :

113 *	109 *

114 * When you have a sara am, decompose it in nikhahit + sara a, and mode the	110 * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, and move the

115 * nihka hit backwards over any tone mark (0E48-0E4B).	111 * NIKHAHIT backwards over any tone mark (0E48-0E4B).

116 *	112 *

117 * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>	113 * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>

118 *	114 *

119 * This reordering is legit only when the nikhahit comes from a sara am, not	115 * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not

120 * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably	116 * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably

121 * not what a u↪ser wanted, but the rendering is nevertheless nikhahit above	117 * not what a user wanted, but the rendering is nevertheless nikhahit above

122 * chattawa.	118 * chattawa.

123 *	119 *

124 * Same for Lao.	120 * Same for Lao.

	121 *

	122 * Note:

	123 *

	124 * Uniscribe also does so below-marks reordering. Namely, it positions U+0E3A

	125 * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.

	126 * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A

	127 * equivalent.

125 */	128 */

126	129

	130

127 /*	131 /*

128 * Here are the characters of significance:	132 * Here are the characters of significance:

129 *	133 *

130 * Thai Lao	134 * Thai Lao

131 * SARA AM: U+0E33 U+0EB3	135 * SARA AM: U+0E33 U+0EB3

132 * SARA AA: U+0E32 U+0EB2	136 * SARA AA: U+0E32 U+0EB2

133 * Nikhahit: U+0E4D U+0ECD	137 * Nikhahit: U+0E4D U+0ECD

134 *	138 *

135 * Tone marks:	139 * Testing shows that Uniscribe reorder the following marks:

136 * Thai:» <0E48..0E4B> CCC=107	140 * Thai:» <0E31,0E34..0E37,0E47..0E4E>

137 * Lao:» <0EC8..0ECB> CCC=122	141 * Lao:» <0EB1,0EB4..0EB7,0EC7..0ECE>

138 *	142 *

139 * Note how the Lao versions are the same as Thai + 0x80.	143 * Note how the Lao versions are the same as Thai + 0x80.

140 */	144 */

141	145

142 /* We only get one script at a time, so a script-agnostic implementation	146 /* We only get one script at a time, so a script-agnostic implementation

143 * is adequate here. */	147 * is adequate here. */

144 #define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)	148 #define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)

145 #define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)	149 #define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)

146 #define SARA_AA_FROM_SARA_AM(x) ((x) - 1)	150 #define SARA_AA_FROM_SARA_AM(x) ((x) - 1)

147 #define IS_TONE_MARK(x) (((x) & ~0x0083) == 0x0E48)	151 #define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x 0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31))

148	152

149 buffer->clear_output ();	153 buffer->clear_output ();

150 unsigned int count = buffer->len;	154 unsigned int count = buffer->len;

151 for (buffer->idx = 0; buffer->idx < count;)	155 for (buffer->idx = 0; buffer->idx < count;)

152 {	156 {

153 hb_codepoint_t u = buffer->cur().codepoint;	157 hb_codepoint_t u = buffer->cur().codepoint;

154 if (likely (!IS_SARA_AM (u))) {	158 if (likely (!IS_SARA_AM (u))) {

155 buffer->next_glyph ();	159 buffer->next_glyph ();

156 continue;	160 continue;

157 }	161 }

158	162

159 /* Is SARA AM. Decompose and reorder. */	163 /* Is SARA AM. Decompose and reorder. */

160 hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),	164 hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),

161 hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};	165 hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};

162 buffer->replace_glyphs (1, 2, decomposed);	166 buffer->replace_glyphs (1, 2, decomposed);

163 if (unlikely (buffer->in_error))	167 if (unlikely (buffer->in_error))

164 return;	168 return;

165	169

166 /* Ok, let's see... */	170 /* Ok, let's see... */

167 unsigned int end = buffer->out_len;	171 unsigned int end = buffer->out_len;

168 unsigned int start = end - 2;	172 unsigned int start = end - 2;

169 while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))	173 while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))

170 start--;	174 start--;

171	175

172 /* Move Nikhahit (end-2) to the beginning */	176 if (start + 2 < end)

173 hb_glyph_info_t t = buffer->out_info[end - 2];	177 {

174 memmove (buffer->out_info + start + 1,	178 /* Move Nikhahit (end-2) to the beginning */

175 » buffer->out_info + start,	179 buffer->merge_out_clusters (start, end);

176 » sizeof (buffer->out_info[0]) * (end - start - 2));	180 hb_glyph_info_t t = buffer->out_info[end - 2];

177 buffer->out_info[start] = t;	181 memmove (buffer->out_info + start + 1,

178	182 » buffer->out_info + start,

179 /* XXX Make this easier! */	183 » sizeof (buffer->out_info[0]) * (end - start - 2));

180 /* Make cluster */	184 buffer->out_info[start] = t;

181 for (; start > 0 && buffer->out_info[start - 1].cluster == buffer->out_info[ start].cluster; start--)	185 }

182 ;	186 else

183 for (; buffer->idx < count;)	187 {

184 if (buffer->cur().cluster == buffer->prev().cluster)	188 /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the

185 buffer->next_glyph ();	189 * previous cluster. */

186 else	190 if (start)

187 break;	191 » buffer->merge_out_clusters (start - 1, end);

188 end = buffer->out_len;	192 }

189

190 buffer->merge_out_clusters (start, end);

191 }	193 }

192 buffer->swap_buffers ();	194 buffer->swap_buffers ();

193 }	195 }

	196

	197 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =

	198 {

	199 "thai",

	200 NULL, /* collect_features */

	201 NULL, /* override_features */

	202 NULL, /* data_create */

	203 NULL, /* data_destroy */

	204 preprocess_text_thai,

	205 NULL, /* normalization_preference */

	206 NULL, /* setup_masks */

	207 true, /* zero_width_attached_marks */

	208 };

OLD	NEW