Index: vm/unicode.h |
=================================================================== |
--- vm/unicode.h (revision 14046) |
+++ vm/unicode.h (working copy) |
@@ -14,13 +14,25 @@ |
class Utf8 : AllStatic { |
public: |
+ enum Type { |
+ kISOLatin1 = 0, // ISO Latin-1 character set. |
cshapiro
2012/10/24 23:52:29
Any reason for the "= 0" here? Do we care about t
siva
2012/10/26 21:38:29
No just following general style we have used for e
|
+ kBMP, // Basic Multilingual Plane. |
+ kSMP, // Supplementary Multilingual Plane. |
+ }; |
+ |
cshapiro
2012/10/24 23:52:29
Adding constants for kMaxCodePoint and kMaxBmpCode
siva
2012/10/26 21:38:29
Done.
|
static const intptr_t kMaxOneByteChar = 0x7F; |
static const intptr_t kMaxTwoByteChar = 0x7FF; |
static const intptr_t kMaxThreeByteChar = 0xFFFF; |
static const intptr_t kMaxFourByteChar = 0x10FFFF; |
+ static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10)); |
+ static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00); |
- static intptr_t CodePointCount(const char* str, intptr_t* width); |
+ static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst); |
+ static intptr_t CodePointCount(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ Type* type); |
+ // Returns true if C string is a valid UTF-8 string. |
cshapiro
2012/10/24 23:52:29
...true if src is a...
siva
2012/10/26 21:38:29
Done.
|
static bool IsValid(const char* src); |
static intptr_t Length(int32_t ch); |
@@ -29,10 +41,30 @@ |
static intptr_t Encode(int32_t ch, char* dst); |
static intptr_t Encode(const String& src, char* dst, intptr_t len); |
- static intptr_t Decode(const char*, int32_t* ch); |
- static bool Decode(const char* src, uint8_t* dst, intptr_t len); |
- static bool Decode(const char* src, uint16_t* dst, intptr_t len); |
- static bool Decode(const char* src, uint32_t* dst, intptr_t len); |
+ static intptr_t Decode(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ int32_t* ch); |
+ |
+ static bool DecodeToISOLatin1(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint8_t* dst, |
+ intptr_t len); |
+ static bool DecodeToUTF16(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint16_t* dst, |
+ intptr_t len); |
+ static bool DecodeToUTF32(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint32_t* dst, |
+ intptr_t len); |
+ static bool DecodeCStringToUTF32(const char* str, |
+ uint32_t* dst, |
+ intptr_t len) { |
+ ASSERT(str != NULL); |
+ intptr_t array_len = strlen(str); |
+ const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); |
+ return DecodeToUTF32(utf8_array, array_len, dst, len); |
+ } |
}; |