Index: src/unicode-inl.h |
=================================================================== |
--- src/unicode-inl.h (revision 10944) |
+++ src/unicode-inl.h (working copy) |
@@ -78,7 +78,7 @@ |
} |
-unsigned Utf8::Encode(char* str, uchar c) { |
+unsigned Utf8::Encode(char* str, uchar c, int previous) { |
static const int kMask = ~(1 << 6); |
if (c <= kMaxOneByteChar) { |
str[0] = c; |
@@ -88,6 +88,13 @@ |
str[1] = 0x80 | (c & kMask); |
return 2; |
} else if (c <= kMaxThreeByteChar) { |
+ if (Utf16::IsTrailSurrogate(c) && |
+ previous != kNoPreviousCharacter && |
rossberg
2012/03/07 13:32:47
Isn't that implied by Utf16::IsLeadSurrogate(previ
Erik Corry
2012/03/11 19:29:22
No, but it should be. Fixed.
|
+ Utf16::IsLeadSurrogate(previous)) { |
+ return Encode(str - 3, |
+ Utf16::CombineSurrogatePair(previous, c), |
+ Utf8::kNoPreviousCharacter) - 3; |
rossberg
2012/03/07 13:32:47
kSizeOfUnmatchedSurrogate instead of 3 perhaps (he
Erik Corry
2012/03/11 19:29:22
Done.
|
+ } |
str[0] = 0xE0 | (c >> 12); |
str[1] = 0x80 | ((c >> 6) & kMask); |
str[2] = 0x80 | (c & kMask); |
@@ -113,12 +120,17 @@ |
return CalculateValue(bytes, length, cursor); |
} |
-unsigned Utf8::Length(uchar c) { |
+unsigned Utf8::Length(uchar c, int previous) { |
if (c <= kMaxOneByteChar) { |
return 1; |
} else if (c <= kMaxTwoByteChar) { |
return 2; |
} else if (c <= kMaxThreeByteChar) { |
+ if (Utf16::IsTrailSurrogate(c) && |
+ previous != kNoPreviousCharacter && |
rossberg
2012/03/07 13:32:47
See above.
Erik Corry
2012/03/11 19:29:22
Done.
|
+ Utf16::IsLeadSurrogate(previous)) { |
+ return 1; |
rossberg
2012/03/07 13:32:47
This is 4 - 3 already counted, I suppose.
Erik Corry
2012/03/11 19:29:22
Fixed
|
+ } |
return 3; |
} else { |
return 4; |