Index: third_party/harfbuzz-ng/src/hb-unicode-private.hh |
diff --git a/third_party/harfbuzz-ng/src/hb-unicode-private.hh b/third_party/harfbuzz-ng/src/hb-unicode-private.hh |
index 8a341747c9566de40a0ca9fdb295d43f23f35c49..9f24a9fbc33b20d05647757b3c8f77ad33c86d87 100644 |
--- a/third_party/harfbuzz-ng/src/hb-unicode-private.hh |
+++ b/third_party/harfbuzz-ng/src/hb-unicode-private.hh |
@@ -1,7 +1,7 @@ |
/* |
* Copyright © 2009 Red Hat, Inc. |
* Copyright © 2011 Codethink Limited |
- * Copyright © 2010,2011 Google, Inc. |
+ * Copyright © 2010,2011,2012 Google, Inc. |
* |
* This is part of HarfBuzz, a text shaping library. |
* |
@@ -37,6 +37,7 @@ |
#include "hb-object-private.hh" |
+extern HB_INTERNAL const uint8_t _hb_modified_combining_class[256]; |
/* |
* hb_unicode_funcs_t |
@@ -50,18 +51,19 @@ |
HB_UNICODE_FUNC_IMPLEMENT (script) \ |
HB_UNICODE_FUNC_IMPLEMENT (compose) \ |
HB_UNICODE_FUNC_IMPLEMENT (decompose) \ |
+ HB_UNICODE_FUNC_IMPLEMENT (decompose_compatibility) \ |
/* ^--- Add new callbacks here */ |
/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */ |
#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE \ |
- HB_UNICODE_FUNC_IMPLEMENT (unsigned int, combining_class) \ |
+ HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_combining_class_t, combining_class) \ |
HB_UNICODE_FUNC_IMPLEMENT (unsigned int, eastasian_width) \ |
HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_general_category_t, general_category) \ |
HB_UNICODE_FUNC_IMPLEMENT (hb_codepoint_t, mirroring) \ |
HB_UNICODE_FUNC_IMPLEMENT (hb_script_t, script) \ |
/* ^--- Add new simple callbacks here */ |
-struct _hb_unicode_funcs_t { |
+struct hb_unicode_funcs_t { |
hb_object_header_t header; |
ASSERT_POD (); |
@@ -69,7 +71,97 @@ struct _hb_unicode_funcs_t { |
bool immutable; |
- /* Don't access these directly. Call hb_unicode_*() instead. */ |
+#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ |
+ inline return_type name (hb_codepoint_t unicode) { return func.name (this, unicode, user_data.name); } |
+HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE |
+#undef HB_UNICODE_FUNC_IMPLEMENT |
+ |
+ inline hb_bool_t compose (hb_codepoint_t a, hb_codepoint_t b, |
+ hb_codepoint_t *ab) |
+ { |
+ *ab = 0; |
+ if (unlikely (!a || !b)) return false; |
+ return func.compose (this, a, b, ab, user_data.compose); |
+ } |
+ |
+ inline hb_bool_t decompose (hb_codepoint_t ab, |
+ hb_codepoint_t *a, hb_codepoint_t *b) |
+ { |
+ *a = ab; *b = 0; |
+ return func.decompose (this, ab, a, b, user_data.decompose); |
+ } |
+ |
+ inline unsigned int decompose_compatibility (hb_codepoint_t u, |
+ hb_codepoint_t *decomposed) |
+ { |
+ unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility); |
+ if (ret == 1 && u == decomposed[0]) { |
+ decomposed[0] = 0; |
+ return 0; |
+ } |
+ decomposed[ret] = 0; |
+ return ret; |
+ } |
+ |
+ |
+ unsigned int |
+ modified_combining_class (hb_codepoint_t unicode) |
+ { |
+ return _hb_modified_combining_class[combining_class (unicode)]; |
+ } |
+ |
+ inline hb_bool_t |
+ is_variation_selector (hb_codepoint_t unicode) |
+ { |
+ return unlikely (hb_in_ranges<hb_codepoint_t> (unicode, |
+ 0x180B, 0x180D, /* MONGOLIAN FREE VARIATION SELECTOR ONE..THREE */ |
+ 0xFE00, 0xFE0F, /* VARIATION SELECTOR-1..16 */ |
+ 0xE0100, 0xE01EF)); /* VARIATION SELECTOR-17..256 */ |
+ } |
+ |
+ /* Zero-Width invisible characters: |
+ * |
+ * 00AD SOFT HYPHEN |
+ * 034F COMBINING GRAPHEME JOINER |
+ * |
+ * 180E MONGOLIAN VOWEL SEPARATOR |
+ * |
+ * 200B ZERO WIDTH SPACE |
+ * 200C ZERO WIDTH NON-JOINER |
+ * 200D ZERO WIDTH JOINER |
+ * 200E LEFT-TO-RIGHT MARK |
+ * 200F RIGHT-TO-LEFT MARK |
+ * |
+ * 2028 LINE SEPARATOR |
+ * |
+ * 202A LEFT-TO-RIGHT EMBEDDING |
+ * 202B RIGHT-TO-LEFT EMBEDDING |
+ * 202C POP DIRECTIONAL FORMATTING |
+ * 202D LEFT-TO-RIGHT OVERRIDE |
+ * 202E RIGHT-TO-LEFT OVERRIDE |
+ * |
+ * 2060 WORD JOINER |
+ * 2061 FUNCTION APPLICATION |
+ * 2062 INVISIBLE TIMES |
+ * 2063 INVISIBLE SEPARATOR |
+ * |
+ * FEFF ZERO WIDTH NO-BREAK SPACE |
+ */ |
+ inline hb_bool_t |
+ is_zero_width (hb_codepoint_t ch) |
+ { |
+ return ((ch & ~0x007F) == 0x2000 && (hb_in_ranges<hb_codepoint_t> (ch, |
+ 0x200B, 0x200F, |
+ 0x202A, 0x202E, |
+ 0x2060, 0x2064) || |
+ (ch == 0x2028))) || |
+ unlikely (ch == 0x0009 || |
+ ch == 0x00AD || |
+ ch == 0x034F || |
+ ch == 0x180E || |
+ ch == 0xFEFF); |
+ } |
+ |
struct { |
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name; |
@@ -91,68 +183,98 @@ struct _hb_unicode_funcs_t { |
}; |
-#ifdef HAVE_GLIB |
-extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs; |
-#define _hb_unicode_funcs_default _hb_glib_unicode_funcs |
-#elif defined(HAVE_ICU) |
-extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs; |
-#define _hb_unicode_funcs_default _hb_icu_unicode_funcs |
-#else |
-#define HB_UNICODE_FUNCS_NIL 1 |
-#define _hb_unicode_funcs_default _hb_unicode_funcs_nil |
-#endif |
- |
+extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil; |
-HB_INTERNAL unsigned int |
-_hb_unicode_modified_combining_class (hb_unicode_funcs_t *ufuncs, |
- hb_codepoint_t unicode); |
-static inline hb_bool_t |
-_hb_unicode_is_variation_selector (hb_codepoint_t unicode) |
-{ |
- return unlikely ((unicode >= 0x180B && unicode <= 0x180D) || /* MONGOLIAN FREE VARIATION SELECTOR ONE..THREE */ |
- (unicode >= 0xFE00 && unicode <= 0xFE0F) || /* VARIATION SELECTOR-1..16 */ |
- (unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */ |
-} |
+/* Modified combining marks */ |
-/* Zero-Width invisible characters: |
+/* Hebrew |
* |
- * 00AD SOFT HYPHEN |
- * 034F COMBINING GRAPHEME JOINER |
+ * We permute the "fixed-position" classes 10-26 into the order |
+ * described in the SBL Hebrew manual: |
* |
- * 200B ZERO WIDTH SPACE |
- * 200C ZERO WIDTH NON-JOINER |
- * 200D ZERO WIDTH JOINER |
- * 200E LEFT-TO-RIGHT MARK |
- * 200F RIGHT-TO-LEFT MARK |
+ * http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf |
* |
- * 2028 LINE SEPARATOR |
+ * (as recommended by: |
+ * http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html) |
* |
- * 202A LEFT-TO-RIGHT EMBEDDING |
- * 202B RIGHT-TO-LEFT EMBEDDING |
- * 202C POP DIRECTIONAL FORMATTING |
- * 202D LEFT-TO-RIGHT OVERRIDE |
- * 202E RIGHT-TO-LEFT OVERRIDE |
+ * More details here: |
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=662055 |
+ */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC10 22 /* sheva */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC11 15 /* hataf segol */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC12 16 /* hataf patah */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC13 17 /* hataf qamats */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC14 23 /* hiriq */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC15 18 /* tsere */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC16 19 /* segol */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC17 20 /* patah */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC18 21 /* qamats */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC19 14 /* holam */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC20 24 /* qubuts */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC21 12 /* dagesh */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC22 25 /* meteg */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC23 13 /* rafe */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC24 10 /* shin dot */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC25 11 /* sin dot */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC26 26 /* point varika */ |
+ |
+/* |
+ * Arabic |
* |
- * 2060 WORD JOINER |
- * 2061 FUNCTION APPLICATION |
- * 2062 INVISIBLE TIMES |
- * 2063 INVISIBLE SEPARATOR |
+ * Modify to move Shadda (ccc=33) before other marks. See: |
+ * http://unicode.org/faq/normalization.html#8 |
+ * http://unicode.org/faq/normalization.html#9 |
+ */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC27 28 /* fathatan */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC28 29 /* dammatan */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC29 30 /* kasratan */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC30 31 /* fatha */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC31 32 /* damma */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC32 33 /* kasra */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC33 27 /* shadda */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC34 34 /* sukun */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC35 35 /* superscript alef */ |
+ |
+/* Syriac */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC36 36 /* superscript alaph */ |
+ |
+/* Telugu |
+ * |
+ * Modify Telugu length marks (ccc=84, ccc=91). |
+ * These are the only matras in the main Indic scripts range that have |
+ * a non-zero ccc. That makes them reorder with the Halant that is |
+ * ccc=9. Just zero them, we don't need them in our Indic shaper. |
+ */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC84 0 /* length mark */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC91 0 /* ai length mark */ |
+ |
+/* Thai |
* |
- * FEFF ZERO WIDTH NO-BREAK SPACE |
+ * Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9). |
+ * Assign 3, which is unassigned otherwise. |
+ * Uniscribe does this reordering too. |
*/ |
-static inline hb_bool_t |
-_hb_unicode_is_zero_width (hb_codepoint_t ch) |
-{ |
- return ((ch & ~0x007F) == 0x2000 && ( |
- (ch >= 0x200B && ch <= 0x200F) || |
- (ch >= 0x202A && ch <= 0x202E) || |
- (ch >= 0x2060 && ch <= 0x2063) || |
- (ch == 0x2028) |
- )) || unlikely (ch == 0x0009 |
- || ch == 0x00AD |
- || ch == 0x034F |
- || ch == 0xFEFF); |
-} |
+#define HB_MODIFIED_COMBINING_CLASS_CCC103 3 /* sara u / sara uu */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC107 107 /* mai * */ |
+ |
+/* Lao */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC118 118 /* sign u / sign uu */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC122 122 /* mai * */ |
+ |
+/* Tibetan */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC129 129 /* sign aa */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC130 130 /* sign i */ |
+#define HB_MODIFIED_COMBINING_CLASS_CCC132 132 /* sign u */ |
+ |
+ |
+/* Misc */ |
+ |
+#define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \ |
+ (FLAG (gen_cat) & \ |
+ (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ |
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \ |
+ FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) |
+ |
#endif /* HB_UNICODE_PRIVATE_HH */ |