OLD | NEW |
1 /* | 1 /* |
2 * (C) 1999 Lars Knoll (knoll@kde.org) | 2 * (C) 1999 Lars Knoll (knoll@kde.org) |
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights | 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights |
4 * reserved. | 4 * reserved. |
5 * Copyright (C) 2007-2009 Torch Mobile, Inc. | 5 * Copyright (C) 2007-2009 Torch Mobile, Inc. |
6 * Copyright (C) 2011 Google Inc. All rights reserved. | 6 * Copyright (C) 2011 Google Inc. All rights reserved. |
7 * | 7 * |
8 * This library is free software; you can redistribute it and/or | 8 * This library is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Library General Public | 9 * modify it under the terms of the GNU Library General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
(...skipping 10 matching lines...) Expand all Loading... |
21 * Boston, MA 02110-1301, USA. | 21 * Boston, MA 02110-1301, USA. |
22 */ | 22 */ |
23 | 23 |
24 #include "platform/text/TextBreakIterator.h" | 24 #include "platform/text/TextBreakIterator.h" |
25 | 25 |
26 #include "platform/text/Character.h" | 26 #include "platform/text/Character.h" |
27 #include "wtf/ASCIICType.h" | 27 #include "wtf/ASCIICType.h" |
28 #include "wtf/StdLibExtras.h" | 28 #include "wtf/StdLibExtras.h" |
29 #include "wtf/text/CharacterNames.h" | 29 #include "wtf/text/CharacterNames.h" |
30 | 30 |
| 31 #include <unicode/uchar.h> |
| 32 #include <unicode/uvernum.h> |
| 33 |
31 namespace blink { | 34 namespace blink { |
32 | 35 |
33 unsigned numGraphemeClusters(const String& string) { | 36 unsigned numGraphemeClusters(const String& string) { |
34 unsigned stringLength = string.length(); | 37 unsigned stringLength = string.length(); |
35 | 38 |
36 if (!stringLength) | 39 if (!stringLength) |
37 return 0; | 40 return 0; |
38 | 41 |
39 // The only Latin-1 Extended Grapheme Cluster is CR LF | 42 // The only Latin-1 Extended Grapheme Cluster is CR LF |
40 if (string.is8Bit() && !string.contains('\r')) | 43 if (string.is8Bit() && !string.contains('\r')) |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // ` | 125 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // ` |
123 AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A
L, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, // a-z | 126 AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A
L, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, // a-z |
124 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // { | 127 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // { |
125 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // | | 128 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // | |
126 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // } | 129 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // } |
127 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // ~ | 130 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0)
}, // ~ |
128 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // DEL | 131 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0,
0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0)
}, // DEL |
129 }; | 132 }; |
130 // clang-format on | 133 // clang-format on |
131 | 134 |
| 135 #if U_ICU_VERSION_MAJOR_NUM >= 58 |
| 136 #define BA_LB_COUNT (U_LB_COUNT - 3) |
| 137 #else |
| 138 #define BA_LB_COUNT U_LB_COUNT |
| 139 #endif |
132 // Line breaking table for CSS word-break: break-all. This table differs from | 140 // Line breaking table for CSS word-break: break-all. This table differs from |
133 // asciiLineBreakTable in: | 141 // asciiLineBreakTable in: |
134 // - Indices are Line Breaking Classes defined in UAX#14 Unicode Line Breaking | 142 // - Indices are Line Breaking Classes defined in UAX#14 Unicode Line Breaking |
135 // Algorithm: http://unicode.org/reports/tr14/#DescriptionOfProperties | 143 // Algorithm: http://unicode.org/reports/tr14/#DescriptionOfProperties |
136 // - 1 indicates additional break opportunities. 0 indicates to fallback to | 144 // - 1 indicates additional break opportunities. 0 indicates to fallback to |
137 // normal line break, not "prohibit break." | 145 // normal line break, not "prohibit break." |
138 // clang-format off | 146 // clang-format off |
139 static const unsigned char breakAllLineBreakClassTable[][U_LB_COUNT / 8 + 1] = { | 147 static const unsigned char breakAllLineBreakClassTable[][BA_LB_COUNT / 8 + 1] =
{ |
140 // XX AI AL B2 BA BB BK CB CL CM CR EX GL HY ID IN IS LF NS NU OP PO P
R QU SA SG SP SY ZW NL WJ H2 H3 JL JT JV CP CJ HL RI | 148 // XX AI AL B2 BA BB BK CB CL CM CR EX GL HY ID IN IS LF NS NU OP PO P
R QU SA SG SP SY ZW NL WJ H2 H3 JL JT JV CP CJ HL RI |
141 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // XX | 149 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // XX |
142 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AI | 150 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AI |
143 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AL | 151 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AL |
144 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // B2 | 152 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // B2 |
145 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // BA | 153 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0,
1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // BA |
146 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BB | 154 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BB |
147 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BK | 155 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BK |
148 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // CB | 156 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0,
0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // CB |
149 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 0, 0,
1, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // CL | 157 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 0, 0,
1, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // CL |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
183 | 191 |
184 #undef B | 192 #undef B |
185 #undef F | 193 #undef F |
186 #undef DI | 194 #undef DI |
187 #undef AL | 195 #undef AL |
188 | 196 |
189 static_assert(WTF_ARRAY_LENGTH(asciiLineBreakTable) == | 197 static_assert(WTF_ARRAY_LENGTH(asciiLineBreakTable) == |
190 asciiLineBreakTableLastChar - asciiLineBreakTableFirstChar + | 198 asciiLineBreakTableLastChar - asciiLineBreakTableFirstChar + |
191 1, | 199 1, |
192 "asciiLineBreakTable should be consistent"); | 200 "asciiLineBreakTable should be consistent"); |
193 static_assert(WTF_ARRAY_LENGTH(breakAllLineBreakClassTable) == U_LB_COUNT, | 201 static_assert(WTF_ARRAY_LENGTH(breakAllLineBreakClassTable) == BA_LB_COUNT, |
194 "breakAllLineBreakClassTable should be consistent"); | 202 "breakAllLineBreakClassTable should be consistent"); |
195 | 203 |
196 static inline bool shouldBreakAfter(UChar lastCh, UChar ch, UChar nextCh) { | 204 static inline bool shouldBreakAfter(UChar lastCh, UChar ch, UChar nextCh) { |
197 // Don't allow line breaking between '-' and a digit if the '-' may mean a | 205 // Don't allow line breaking between '-' and a digit if the '-' may mean a |
198 // minus sign in the context, while allow breaking in 'ABCD-1234' and | 206 // minus sign in the context, while allow breaking in 'ABCD-1234' and |
199 // '1234-5678' which may be in long URLs. | 207 // '1234-5678' which may be in long URLs. |
200 if (ch == '-' && isASCIIDigit(nextCh)) | 208 if (ch == '-' && isASCIIDigit(nextCh)) |
201 return isASCIIAlphanumeric(lastCh); | 209 return isASCIIAlphanumeric(lastCh); |
202 | 210 |
203 // If both ch and nextCh are ASCII characters, use a lookup table for enhanced | 211 // If both ch and nextCh are ASCII characters, use a lookup table for enhanced |
(...skipping 15 matching lines...) Expand all Loading... |
219 if (ch == '+') // IE tailors '+' to AL-like class when break-all is enabled. | 227 if (ch == '+') // IE tailors '+' to AL-like class when break-all is enabled. |
220 return U_LB_ALPHABETIC; | 228 return U_LB_ALPHABETIC; |
221 UChar32 ch32 = U16_IS_LEAD(lastCh) && U16_IS_TRAIL(ch) | 229 UChar32 ch32 = U16_IS_LEAD(lastCh) && U16_IS_TRAIL(ch) |
222 ? U16_GET_SUPPLEMENTARY(lastCh, ch) | 230 ? U16_GET_SUPPLEMENTARY(lastCh, ch) |
223 : ch; | 231 : ch; |
224 return static_cast<ULineBreak>(u_getIntPropertyValue(ch32, UCHAR_LINE_BREAK)); | 232 return static_cast<ULineBreak>(u_getIntPropertyValue(ch32, UCHAR_LINE_BREAK)); |
225 } | 233 } |
226 | 234 |
227 static inline bool shouldBreakAfterBreakAll(ULineBreak lastLineBreak, | 235 static inline bool shouldBreakAfterBreakAll(ULineBreak lastLineBreak, |
228 ULineBreak lineBreak) { | 236 ULineBreak lineBreak) { |
229 if (lineBreak >= 0 && lineBreak < U_LB_COUNT && lastLineBreak >= 0 && | 237 if (lineBreak >= 0 && lineBreak < BA_LB_COUNT && lastLineBreak >= 0 && |
230 lastLineBreak < U_LB_COUNT) { | 238 lastLineBreak < BA_LB_COUNT) { |
231 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak]; | 239 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak]; |
232 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); | 240 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); |
233 } | 241 } |
234 return false; | 242 return false; |
235 } | 243 } |
236 | 244 |
237 inline bool needsLineBreakIterator(UChar ch) { | 245 inline bool needsLineBreakIterator(UChar ch) { |
238 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; | 246 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; |
239 } | 247 } |
240 | 248 |
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
427 | 435 |
428 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) { | 436 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) { |
429 if (m_string.is8Bit()) | 437 if (m_string.is8Bit()) |
430 return nextBreakablePosition<LChar, LineBreakType::Normal>( | 438 return nextBreakablePosition<LChar, LineBreakType::Normal>( |
431 *this, m_string.characters8(), m_string.length(), pos); | 439 *this, m_string.characters8(), m_string.length(), pos); |
432 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(), | 440 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(), |
433 m_string.length(), pos); | 441 m_string.length(), pos); |
434 } | 442 } |
435 | 443 |
436 } // namespace blink | 444 } // namespace blink |
OLD | NEW |