Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1929)

Unified Diff: vm/unicode.h

Issue 11275008: - Represent strings internally in UTF-16 format, this makes it (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/runtime/
Patch Set: Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: vm/unicode.h
===================================================================
--- vm/unicode.h (revision 14046)
+++ vm/unicode.h (working copy)
@@ -14,13 +14,25 @@
class Utf8 : AllStatic {
public:
+ enum Type {
+ kISOLatin1 = 0, // ISO Latin-1 character set.
cshapiro 2012/10/24 23:52:29 Any reason for the "= 0" here? Do we care about t
siva 2012/10/26 21:38:29 No just following general style we have used for e
+ kBMP, // Basic Multilingual Plane.
+ kSMP, // Supplementary Multilingual Plane.
+ };
+
cshapiro 2012/10/24 23:52:29 Adding constants for kMaxCodePoint and kMaxBmpCode
siva 2012/10/26 21:38:29 Done.
static const intptr_t kMaxOneByteChar = 0x7F;
static const intptr_t kMaxTwoByteChar = 0x7FF;
static const intptr_t kMaxThreeByteChar = 0xFFFF;
static const intptr_t kMaxFourByteChar = 0x10FFFF;
+ static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10));
+ static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00);
- static intptr_t CodePointCount(const char* str, intptr_t* width);
+ static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst);
+ static intptr_t CodePointCount(const uint8_t* utf8_array,
+ intptr_t array_len,
+ Type* type);
+ // Returns true if C string is a valid UTF-8 string.
cshapiro 2012/10/24 23:52:29 ...true if src is a...
siva 2012/10/26 21:38:29 Done.
static bool IsValid(const char* src);
static intptr_t Length(int32_t ch);
@@ -29,10 +41,30 @@
static intptr_t Encode(int32_t ch, char* dst);
static intptr_t Encode(const String& src, char* dst, intptr_t len);
- static intptr_t Decode(const char*, int32_t* ch);
- static bool Decode(const char* src, uint8_t* dst, intptr_t len);
- static bool Decode(const char* src, uint16_t* dst, intptr_t len);
- static bool Decode(const char* src, uint32_t* dst, intptr_t len);
+ static intptr_t Decode(const uint8_t* utf8_array,
+ intptr_t array_len,
+ int32_t* ch);
+
+ static bool DecodeToISOLatin1(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint8_t* dst,
+ intptr_t len);
+ static bool DecodeToUTF16(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint16_t* dst,
+ intptr_t len);
+ static bool DecodeToUTF32(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint32_t* dst,
+ intptr_t len);
+ static bool DecodeCStringToUTF32(const char* str,
+ uint32_t* dst,
+ intptr_t len) {
+ ASSERT(str != NULL);
+ intptr_t array_len = strlen(str);
+ const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);
+ return DecodeToUTF32(utf8_array, array_len, dst, len);
+ }
};

Powered by Google App Engine
This is Rietveld 408576698