Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextBreakIterator.cpp

Issue 2440923002: Prepare to upgrade ICU to 58 part 2 (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * (C) 1999 Lars Knoll (knoll@kde.org) 2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights
4 * reserved. 4 * reserved.
5 * Copyright (C) 2007-2009 Torch Mobile, Inc. 5 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 * Copyright (C) 2011 Google Inc. All rights reserved. 6 * Copyright (C) 2011 Google Inc. All rights reserved.
7 * 7 *
8 * This library is free software; you can redistribute it and/or 8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public 9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 10 matching lines...) Expand all
21 * Boston, MA 02110-1301, USA. 21 * Boston, MA 02110-1301, USA.
22 */ 22 */
23 23
24 #include "platform/text/TextBreakIterator.h" 24 #include "platform/text/TextBreakIterator.h"
25 25
26 #include "platform/text/Character.h" 26 #include "platform/text/Character.h"
27 #include "wtf/ASCIICType.h" 27 #include "wtf/ASCIICType.h"
28 #include "wtf/StdLibExtras.h" 28 #include "wtf/StdLibExtras.h"
29 #include "wtf/text/CharacterNames.h" 29 #include "wtf/text/CharacterNames.h"
30 30
31 #include <unicode/uchar.h>
32 #include <unicode/uvernum.h>
33
31 namespace blink { 34 namespace blink {
32 35
33 unsigned numGraphemeClusters(const String& string) { 36 unsigned numGraphemeClusters(const String& string) {
34 unsigned stringLength = string.length(); 37 unsigned stringLength = string.length();
35 38
36 if (!stringLength) 39 if (!stringLength)
37 return 0; 40 return 0;
38 41
39 // The only Latin-1 Extended Grapheme Cluster is CR LF 42 // The only Latin-1 Extended Grapheme Cluster is CR LF
40 if (string.is8Bit() && !string.contains('\r')) 43 if (string.is8Bit() && !string.contains('\r'))
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // ` 125 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // `
123 AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A L, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, // a-z 126 AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A L, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, // a-z
124 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // { 127 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // {
125 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // | 128 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // |
126 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // } 129 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // }
127 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // ~ 130 { B(0, 0, 0, 0, 0, 0, 0, 1), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 1, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 1, 0, 0, 0, 0, 0) }, // ~
128 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // DEL 131 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0), 0, 0, 0, B(0, 0, 0, 0, 0, 0, 0, 0) }, // DEL
129 }; 132 };
130 // clang-format on 133 // clang-format on
131 134
135 #if U_ICU_VERSION_MAJOR_NUM >= 58
136 #define BA_LB_COUNT (U_LB_COUNT - 3)
137 #else
138 #define BA_LB_COUNT U_LB_COUNT
139 #endif
132 // Line breaking table for CSS word-break: break-all. This table differs from 140 // Line breaking table for CSS word-break: break-all. This table differs from
133 // asciiLineBreakTable in: 141 // asciiLineBreakTable in:
134 // - Indices are Line Breaking Classes defined in UAX#14 Unicode Line Breaking 142 // - Indices are Line Breaking Classes defined in UAX#14 Unicode Line Breaking
135 // Algorithm: http://unicode.org/reports/tr14/#DescriptionOfProperties 143 // Algorithm: http://unicode.org/reports/tr14/#DescriptionOfProperties
136 // - 1 indicates additional break opportunities. 0 indicates to fallback to 144 // - 1 indicates additional break opportunities. 0 indicates to fallback to
137 // normal line break, not "prohibit break." 145 // normal line break, not "prohibit break."
138 // clang-format off 146 // clang-format off
139 static const unsigned char breakAllLineBreakClassTable[][U_LB_COUNT / 8 + 1] = { 147 static const unsigned char breakAllLineBreakClassTable[][BA_LB_COUNT / 8 + 1] = {
140 // XX AI AL B2 BA BB BK CB CL CM CR EX GL HY ID IN IS LF NS NU OP PO P R QU SA SG SP SY ZW NL WJ H2 H3 JL JT JV CP CJ HL RI 148 // XX AI AL B2 BA BB BK CB CL CM CR EX GL HY ID IN IS LF NS NU OP PO P R QU SA SG SP SY ZW NL WJ H2 H3 JL JT JV CP CJ HL RI
141 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // XX 149 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // XX
142 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AI 150 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AI
143 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AL 151 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // AL
144 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // B2 152 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // B2
145 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // BA 153 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 1, 0, 1, 0), B(1, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // BA
146 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BB 154 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BB
147 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BK 155 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // BK
148 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // CB 156 { B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 0, 0) }, // CB
149 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 0, 0, 1, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // CL 157 { B(0, 1, 1, 0, 1, 0, 0, 0), B(0, 0, 0, 0, 0, 1, 0, 0), B(0, 0, 0, 1, 0, 0, 1, 0), B(0, 0, 0, 0, 0, 0, 0, 0), B(0, 0, 0, 0, 0, 0, 1, 0) }, // CL
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
183 191
184 #undef B 192 #undef B
185 #undef F 193 #undef F
186 #undef DI 194 #undef DI
187 #undef AL 195 #undef AL
188 196
189 static_assert(WTF_ARRAY_LENGTH(asciiLineBreakTable) == 197 static_assert(WTF_ARRAY_LENGTH(asciiLineBreakTable) ==
190 asciiLineBreakTableLastChar - asciiLineBreakTableFirstChar + 198 asciiLineBreakTableLastChar - asciiLineBreakTableFirstChar +
191 1, 199 1,
192 "asciiLineBreakTable should be consistent"); 200 "asciiLineBreakTable should be consistent");
193 static_assert(WTF_ARRAY_LENGTH(breakAllLineBreakClassTable) == U_LB_COUNT, 201 static_assert(WTF_ARRAY_LENGTH(breakAllLineBreakClassTable) == BA_LB_COUNT,
194 "breakAllLineBreakClassTable should be consistent"); 202 "breakAllLineBreakClassTable should be consistent");
195 203
196 static inline bool shouldBreakAfter(UChar lastCh, UChar ch, UChar nextCh) { 204 static inline bool shouldBreakAfter(UChar lastCh, UChar ch, UChar nextCh) {
197 // Don't allow line breaking between '-' and a digit if the '-' may mean a 205 // Don't allow line breaking between '-' and a digit if the '-' may mean a
198 // minus sign in the context, while allow breaking in 'ABCD-1234' and 206 // minus sign in the context, while allow breaking in 'ABCD-1234' and
199 // '1234-5678' which may be in long URLs. 207 // '1234-5678' which may be in long URLs.
200 if (ch == '-' && isASCIIDigit(nextCh)) 208 if (ch == '-' && isASCIIDigit(nextCh))
201 return isASCIIAlphanumeric(lastCh); 209 return isASCIIAlphanumeric(lastCh);
202 210
203 // If both ch and nextCh are ASCII characters, use a lookup table for enhanced 211 // If both ch and nextCh are ASCII characters, use a lookup table for enhanced
(...skipping 15 matching lines...) Expand all
219 if (ch == '+') // IE tailors '+' to AL-like class when break-all is enabled. 227 if (ch == '+') // IE tailors '+' to AL-like class when break-all is enabled.
220 return U_LB_ALPHABETIC; 228 return U_LB_ALPHABETIC;
221 UChar32 ch32 = U16_IS_LEAD(lastCh) && U16_IS_TRAIL(ch) 229 UChar32 ch32 = U16_IS_LEAD(lastCh) && U16_IS_TRAIL(ch)
222 ? U16_GET_SUPPLEMENTARY(lastCh, ch) 230 ? U16_GET_SUPPLEMENTARY(lastCh, ch)
223 : ch; 231 : ch;
224 return static_cast<ULineBreak>(u_getIntPropertyValue(ch32, UCHAR_LINE_BREAK)); 232 return static_cast<ULineBreak>(u_getIntPropertyValue(ch32, UCHAR_LINE_BREAK));
225 } 233 }
226 234
227 static inline bool shouldBreakAfterBreakAll(ULineBreak lastLineBreak, 235 static inline bool shouldBreakAfterBreakAll(ULineBreak lastLineBreak,
228 ULineBreak lineBreak) { 236 ULineBreak lineBreak) {
229 if (lineBreak >= 0 && lineBreak < U_LB_COUNT && lastLineBreak >= 0 && 237 if (lineBreak >= 0 && lineBreak < BA_LB_COUNT && lastLineBreak >= 0 &&
230 lastLineBreak < U_LB_COUNT) { 238 lastLineBreak < BA_LB_COUNT) {
231 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak]; 239 const unsigned char* tableRow = breakAllLineBreakClassTable[lastLineBreak];
232 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8)); 240 return tableRow[lineBreak / 8] & (1 << (lineBreak % 8));
233 } 241 }
234 return false; 242 return false;
235 } 243 }
236 244
237 inline bool needsLineBreakIterator(UChar ch) { 245 inline bool needsLineBreakIterator(UChar ch) {
238 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter; 246 return ch > asciiLineBreakTableLastChar && ch != noBreakSpaceCharacter;
239 } 247 }
240 248
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 435
428 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) { 436 int LazyLineBreakIterator::nextBreakablePositionKeepAll(int pos) {
429 if (m_string.is8Bit()) 437 if (m_string.is8Bit())
430 return nextBreakablePosition<LChar, LineBreakType::Normal>( 438 return nextBreakablePosition<LChar, LineBreakType::Normal>(
431 *this, m_string.characters8(), m_string.length(), pos); 439 *this, m_string.characters8(), m_string.length(), pos);
432 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(), 440 return nextBreakablePositionKeepAllInternal(*this, m_string.characters16(),
433 m_string.length(), pos); 441 m_string.length(), pos);
434 } 442 }
435 443
436 } // namespace blink 444 } // namespace blink
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698