| Index: runtime/vm/unicode.cc
|
| diff --git a/runtime/vm/unicode.cc b/runtime/vm/unicode.cc
|
| index 77dc65aae8abf6eb24c0b2339f6f46b9be50f7a5..d9dca3e45cfecd58179eb3db7349a101edc5f5fc 100644
|
| --- a/runtime/vm/unicode.cc
|
| +++ b/runtime/vm/unicode.cc
|
| @@ -76,6 +76,7 @@ static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) {
|
| }
|
|
|
|
|
| +// Returns a count of the number of UTF-8 trail bytes.
|
| intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) {
|
| bool is_two_byte_string = false;
|
| bool is_four_byte_string = false;
|
| @@ -104,6 +105,39 @@ intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) {
|
| }
|
|
|
|
|
| +// Returns true if str is a valid NUL-terminated UTF-8 string.
|
| +bool Utf8::IsValid(const char* str) {
|
| + intptr_t i = 0;
|
| + while (str[i] != '\0') {
|
| + uint32_t ch = str[i] & 0xFF;
|
| + intptr_t j = 1;
|
| + if (ch >= 0x80) {
|
| + uint8_t num_trail_bytes = kTrailBytes[ch];
|
| + bool is_malformed = false;
|
| + for (; j < num_trail_bytes; ++j) {
|
| + if (str[i + j] != '\0') {
|
| + uint8_t code_unit = str[i + j];
|
| + is_malformed |= !IsTrailByte(code_unit);
|
| + ch = (ch << 6) + code_unit;
|
| + } else {
|
| + return false;
|
| + }
|
| + }
|
| + ch -= kMagicBits[num_trail_bytes];
|
| + if (!((is_malformed == false) &&
|
| + (j == num_trail_bytes) &&
|
| + !IsOutOfRange(ch) &&
|
| + !IsNonShortestForm(ch, j) &&
|
| + !IsSurrogate(ch))) {
|
| + return false;
|
| + }
|
| + }
|
| + i += j;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +
|
| intptr_t Utf8::Length(int32_t ch) {
|
| if (ch <= kMaxOneByteChar) {
|
| return 1;
|
|
|