Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Unified Diff: src/unicode-inl.h

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/unicode-inl.h
===================================================================
--- src/unicode-inl.h (revision 10944)
+++ src/unicode-inl.h (working copy)
@@ -78,7 +78,7 @@
}
-unsigned Utf8::Encode(char* str, uchar c) {
+unsigned Utf8::Encode(char* str, uchar c, int previous) {
static const int kMask = ~(1 << 6);
if (c <= kMaxOneByteChar) {
str[0] = c;
@@ -88,6 +88,13 @@
str[1] = 0x80 | (c & kMask);
return 2;
} else if (c <= kMaxThreeByteChar) {
+ if (Utf16::IsTrailSurrogate(c) &&
+ previous != kNoPreviousCharacter &&
rossberg 2012/03/07 13:32:47 Isn't that implied by Utf16::IsLeadSurrogate(previ
Erik Corry 2012/03/11 19:29:22 No, but it should be. Fixed.
+ Utf16::IsLeadSurrogate(previous)) {
+ return Encode(str - 3,
+ Utf16::CombineSurrogatePair(previous, c),
+ Utf8::kNoPreviousCharacter) - 3;
rossberg 2012/03/07 13:32:47 kSizeOfUnmatchedSurrogate instead of 3 perhaps (he
Erik Corry 2012/03/11 19:29:22 Done.
+ }
str[0] = 0xE0 | (c >> 12);
str[1] = 0x80 | ((c >> 6) & kMask);
str[2] = 0x80 | (c & kMask);
@@ -113,12 +120,17 @@
return CalculateValue(bytes, length, cursor);
}
-unsigned Utf8::Length(uchar c) {
+unsigned Utf8::Length(uchar c, int previous) {
if (c <= kMaxOneByteChar) {
return 1;
} else if (c <= kMaxTwoByteChar) {
return 2;
} else if (c <= kMaxThreeByteChar) {
+ if (Utf16::IsTrailSurrogate(c) &&
+ previous != kNoPreviousCharacter &&
rossberg 2012/03/07 13:32:47 See above.
Erik Corry 2012/03/11 19:29:22 Done.
+ Utf16::IsLeadSurrogate(previous)) {
+ return 1;
rossberg 2012/03/07 13:32:47 This is 4 - 3 already counted, I suppose.
Erik Corry 2012/03/11 19:29:22 Fixed
+ }
return 3;
} else {
return 4;
« src/unicode.h ('K') | « src/unicode.cc ('k') | src/x64/regexp-macro-assembler-x64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698