Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(699)

Side by Side Diff: runtime/vm/unicode.cc

Issue 10584044: Check that the C string passed to Dart_NewString is valid UTF-8. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« runtime/vm/dart_api_impl.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
69 return code_point > 0x10FFFF; 69 return code_point > 0x10FFFF;
70 } 70 }
71 71
72 72
73 // Returns true if the byte sequence is ill-formed. 73 // Returns true if the byte sequence is ill-formed.
74 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { 74 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) {
75 return code_point < kOverlongMinimum[num_bytes]; 75 return code_point < kOverlongMinimum[num_bytes];
76 } 76 }
77 77
78 78
79 // Returns a count of the number of UTF-8 trail bytes.
79 intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) { 80 intptr_t Utf8::CodePointCount(const char* str, intptr_t* width) {
80 bool is_two_byte_string = false; 81 bool is_two_byte_string = false;
81 bool is_four_byte_string = false; 82 bool is_four_byte_string = false;
82 intptr_t len = 0; 83 intptr_t len = 0;
83 for (; *str != '\0'; ++str) { 84 for (; *str != '\0'; ++str) {
84 uint8_t code_unit = *str; 85 uint8_t code_unit = *str;
85 if (!IsTrailByte(code_unit)) { 86 if (!IsTrailByte(code_unit)) {
86 ++len; 87 ++len;
87 } 88 }
88 if (code_unit > 0xC3) { // > U+00FF 89 if (code_unit > 0xC3) { // > U+00FF
89 if (code_unit < 0xF0) { // < U+10000 90 if (code_unit < 0xF0) { // < U+10000
90 is_two_byte_string = true; 91 is_two_byte_string = true;
91 } else { 92 } else {
92 is_four_byte_string = true; 93 is_four_byte_string = true;
93 } 94 }
94 } 95 }
95 } 96 }
96 if (is_four_byte_string) { 97 if (is_four_byte_string) {
97 *width = 4; 98 *width = 4;
98 } else if (is_two_byte_string) { 99 } else if (is_two_byte_string) {
99 *width = 2; 100 *width = 2;
100 } else { 101 } else {
101 *width = 1; 102 *width = 1;
102 } 103 }
103 return len; 104 return len;
104 } 105 }
105 106
106 107
108 // Returns true if str is a valid NUL-terminated UTF-8 string.
109 bool Utf8::IsValid(const char* str) {
110 intptr_t i = 0;
111 while (str[i] != '\0') {
112 uint32_t ch = str[i] & 0xFF;
113 intptr_t j = 1;
114 if (ch >= 0x80) {
115 uint8_t num_trail_bytes = kTrailBytes[ch];
116 bool is_malformed = false;
117 for (; j < num_trail_bytes; ++j) {
118 if (str[i + j] != '\0') {
119 uint8_t code_unit = str[i + j];
120 is_malformed |= !IsTrailByte(code_unit);
121 ch = (ch << 6) + code_unit;
122 } else {
123 return false;
124 }
125 }
126 ch -= kMagicBits[num_trail_bytes];
127 if (!((is_malformed == false) &&
128 (j == num_trail_bytes) &&
129 !IsOutOfRange(ch) &&
130 !IsNonShortestForm(ch, j) &&
131 !IsSurrogate(ch))) {
132 return false;
133 }
134 }
135 i += j;
136 }
137 return true;
138 }
139
140
107 intptr_t Utf8::Length(int32_t ch) { 141 intptr_t Utf8::Length(int32_t ch) {
108 if (ch <= kMaxOneByteChar) { 142 if (ch <= kMaxOneByteChar) {
109 return 1; 143 return 1;
110 } else if (ch <= kMaxTwoByteChar) { 144 } else if (ch <= kMaxTwoByteChar) {
111 return 2; 145 return 2;
112 } else if (ch <= kMaxThreeByteChar) { 146 } else if (ch <= kMaxThreeByteChar) {
113 return 3; 147 return 3;
114 } 148 }
115 ASSERT(ch <= kMaxFourByteChar); 149 ASSERT(ch <= kMaxFourByteChar);
116 return 4; 150 return 4;
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
227 bool Utf8::Decode(const char* src, uint16_t* dst, intptr_t len) { 261 bool Utf8::Decode(const char* src, uint16_t* dst, intptr_t len) {
228 return DecodeImpl(src, dst, len); 262 return DecodeImpl(src, dst, len);
229 } 263 }
230 264
231 265
232 bool Utf8::Decode(const char* src, uint32_t* dst, intptr_t len) { 266 bool Utf8::Decode(const char* src, uint32_t* dst, intptr_t len) {
233 return DecodeImpl(src, dst, len); 267 return DecodeImpl(src, dst, len);
234 } 268 }
235 269
236 } // namespace dart 270 } // namespace dart
OLDNEW
« runtime/vm/dart_api_impl.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698