| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
| 6 | 6 |
| 7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
| 8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
| 9 #include "vm/object.h" | 9 #include "vm/object.h" |
| 10 | 10 |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 69 return code_point > 0x10FFFF; | 69 return code_point > 0x10FFFF; |
| 70 } | 70 } |
| 71 | 71 |
| 72 | 72 |
| 73 // Returns true if the byte sequence is ill-formed. | 73 // Returns true if the byte sequence is ill-formed. |
| 74 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { | 74 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { |
| 75 return code_point < kOverlongMinimum[num_bytes]; | 75 return code_point < kOverlongMinimum[num_bytes]; |
| 76 } | 76 } |
| 77 | 77 |
| 78 | 78 |
| 79 // Returns a count of the number of UTF-8 trail bytes. |
| 79 intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) { | 80 intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) { |
| 80 bool is_two_byte_string = false; | 81 bool is_two_byte_string = false; |
| 81 bool is_four_byte_string = false; | 82 bool is_four_byte_string = false; |
| 82 intptr_t len = 0; | 83 intptr_t len = 0; |
| 83 for (; *str != '\0'; ++str) { | 84 for (; *str != '\0'; ++str) { |
| 84 uint8_t code_unit = *str; | 85 uint8_t code_unit = *str; |
| 85 if (!IsTrailByte(code_unit)) { | 86 if (!IsTrailByte(code_unit)) { |
| 86 ++len; | 87 ++len; |
| 87 } | 88 } |
| 88 if (code_unit > 0xC3) { // > U+00FF | 89 if (code_unit > 0xC3) { // > U+00FF |
| 89 if (code_unit < 0xF0) { // < U+10000 | 90 if (code_unit < 0xF0) { // < U+10000 |
| 90 is_two_byte_string = true; | 91 is_two_byte_string = true; |
| 91 } else { | 92 } else { |
| 92 is_four_byte_string = true; | 93 is_four_byte_string = true; |
| 93 } | 94 } |
| 94 } | 95 } |
| 95 } | 96 } |
| 96 if (is_four_byte_string) { | 97 if (is_four_byte_string) { |
| 97 *width = 4; | 98 *width = 4; |
| 98 } else if (is_two_byte_string) { | 99 } else if (is_two_byte_string) { |
| 99 *width = 2; | 100 *width = 2; |
| 100 } else { | 101 } else { |
| 101 *width = 1; | 102 *width = 1; |
| 102 } | 103 } |
| 103 return len; | 104 return len; |
| 104 } | 105 } |
| 105 | 106 |
| 106 | 107 |
| 108 // Returns true if str is a valid NUL-terminated UTF-8 string. |
| 109 bool Utf8::IsValid(const char* str) { |
| 110 intptr_t i = 0; |
| 111 while (str[i] != '\0') { |
| 112 uint32_t ch = str[i] & 0xFF; |
| 113 intptr_t j = 1; |
| 114 if (ch >= 0x80) { |
| 115 uint8_t num_trail_bytes = kTrailBytes[ch]; |
| 116 bool is_malformed = false; |
| 117 for (; j < num_trail_bytes; ++j) { |
| 118 if (str[i + j] != '\0') { |
| 119 uint8_t code_unit = str[i + j]; |
| 120 is_malformed |= !IsTrailByte(code_unit); |
| 121 ch = (ch << 6) + code_unit; |
| 122 } else { |
| 123 return false; |
| 124 } |
| 125 } |
| 126 ch -= kMagicBits[num_trail_bytes]; |
| 127 if (!((is_malformed == false) && |
| 128 (j == num_trail_bytes) && |
| 129 !IsOutOfRange(ch) && |
| 130 !IsNonShortestForm(ch, j) && |
| 131 !IsSurrogate(ch))) { |
| 132 return false; |
| 133 } |
| 134 } |
| 135 i += j; |
| 136 } |
| 137 return true; |
| 138 } |
| 139 |
| 140 |
| 107 intptr_t Utf8::Length(int32_t ch) { | 141 intptr_t Utf8::Length(int32_t ch) { |
| 108 if (ch <= kMaxOneByteChar) { | 142 if (ch <= kMaxOneByteChar) { |
| 109 return 1; | 143 return 1; |
| 110 } else if (ch <= kMaxTwoByteChar) { | 144 } else if (ch <= kMaxTwoByteChar) { |
| 111 return 2; | 145 return 2; |
| 112 } else if (ch <= kMaxThreeByteChar) { | 146 } else if (ch <= kMaxThreeByteChar) { |
| 113 return 3; | 147 return 3; |
| 114 } | 148 } |
| 115 ASSERT(ch <= kMaxFourByteChar); | 149 ASSERT(ch <= kMaxFourByteChar); |
| 116 return 4; | 150 return 4; |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 227 bool Utf8::Decode(const char* src, uint16_t* dst, intptr_t len) { | 261 bool Utf8::Decode(const char* src, uint16_t* dst, intptr_t len) { |
| 228 return DecodeImpl(src, dst, len); | 262 return DecodeImpl(src, dst, len); |
| 229 } | 263 } |
| 230 | 264 |
| 231 | 265 |
| 232 bool Utf8::Decode(const char* src, uint32_t* dst, intptr_t len) { | 266 bool Utf8::Decode(const char* src, uint32_t* dst, intptr_t len) { |
| 233 return DecodeImpl(src, dst, len); | 267 return DecodeImpl(src, dst, len); |
| 234 } | 268 } |
| 235 | 269 |
| 236 } // namespace dart | 270 } // namespace dart |
| OLD | NEW |