Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/heap.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 4157 matching lines...) Expand 10 before | Expand all | Expand 10 after
4168 SeqAsciiString* string_result = SeqAsciiString::cast(result); 4168 SeqAsciiString* string_result = SeqAsciiString::cast(result);
4169 for (int i = 0; i < string.length(); i++) { 4169 for (int i = 0; i < string.length(); i++) {
4170 string_result->SeqAsciiStringSet(i, string[i]); 4170 string_result->SeqAsciiStringSet(i, string[i]);
4171 } 4171 }
4172 return result; 4172 return result;
4173 } 4173 }
4174 4174
4175 4175
4176 MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string, 4176 MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
4177 PretenureFlag pretenure) { 4177 PretenureFlag pretenure) {
4178 // V8 only supports characters in the Basic Multilingual Plane.
4179 const uc32 kMaxSupportedChar = 0xFFFF;
4180 // Count the number of characters in the UTF-8 string and check if 4178 // Count the number of characters in the UTF-8 string and check if
4181 // it is an ASCII string. 4179 // it is an ASCII string.
4182 Access<UnicodeCache::Utf8Decoder> 4180 Access<UnicodeCache::Utf8Decoder>
4183 decoder(isolate_->unicode_cache()->utf8_decoder()); 4181 decoder(isolate_->unicode_cache()->utf8_decoder());
4184 decoder->Reset(string.start(), string.length()); 4182 decoder->Reset(string.start(), string.length());
4185 int chars = 0; 4183 int chars = 0;
4186 while (decoder->has_more()) { 4184 while (decoder->has_more()) {
4187 decoder->GetNext(); 4185 uint32_t r = decoder->GetNext();
4188 chars++; 4186 if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
4187 chars++;
4188 } else {
4189 chars += 2;
4190 }
4189 } 4191 }
4190 4192
4191 Object* result; 4193 Object* result;
4192 { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure); 4194 { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
4193 if (!maybe_result->ToObject(&result)) return maybe_result; 4195 if (!maybe_result->ToObject(&result)) return maybe_result;
4194 } 4196 }
4195 4197
4196 // Convert and copy the characters into the new object. 4198 // Convert and copy the characters into the new object.
4197 String* string_result = String::cast(result); 4199 String* string_result = String::cast(result);
4198 decoder->Reset(string.start(), string.length()); 4200 decoder->Reset(string.start(), string.length());
4199 for (int i = 0; i < chars; i++) { 4201 int i = 0;
4200 uc32 r = decoder->GetNext(); 4202 while (i < chars) {
4201 if (r > kMaxSupportedChar) { r = unibrow::Utf8::kBadChar; } 4203 uint32_t r = decoder->GetNext();
4202 string_result->Set(i, r); 4204 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
4205 string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
4206 string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
4207 } else {
4208 string_result->Set(i++, r);
4209 }
4203 } 4210 }
4204 return result; 4211 return result;
4205 } 4212 }
4206 4213
4207 4214
4208 MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string, 4215 MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
4209 PretenureFlag pretenure) { 4216 PretenureFlag pretenure) {
4210 // Check if the string is an ASCII string. 4217 // Check if the string is an ASCII string.
4211 MaybeObject* maybe_result; 4218 MaybeObject* maybe_result;
4212 if (String::IsAscii(string.start(), string.length())) { 4219 if (String::IsAscii(string.start(), string.length())) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
4249 default: return NULL; // No match found. 4256 default: return NULL; // No match found.
4250 } 4257 }
4251 } 4258 }
4252 4259
4253 4260
4254 MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer, 4261 MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
4255 int chars, 4262 int chars,
4256 uint32_t hash_field) { 4263 uint32_t hash_field) {
4257 ASSERT(chars >= 0); 4264 ASSERT(chars >= 0);
4258 // Ensure the chars matches the number of characters in the buffer. 4265 // Ensure the chars matches the number of characters in the buffer.
4259 ASSERT(static_cast<unsigned>(chars) == buffer->Length()); 4266 ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
4260 // Determine whether the string is ASCII. 4267 // Determine whether the string is ASCII.
4261 bool is_ascii = true; 4268 bool is_ascii = true;
4262 while (buffer->has_more()) { 4269 while (buffer->has_more()) {
4263 if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) { 4270 if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
4264 is_ascii = false; 4271 is_ascii = false;
4265 break; 4272 break;
4266 } 4273 }
4267 } 4274 }
4268 buffer->Rewind(); 4275 buffer->Rewind();
4269 4276
(...skipping 25 matching lines...) Expand all
4295 4302
4296 reinterpret_cast<HeapObject*>(result)->set_map_no_write_barrier(map); 4303 reinterpret_cast<HeapObject*>(result)->set_map_no_write_barrier(map);
4297 // Set length and hash fields of the allocated string. 4304 // Set length and hash fields of the allocated string.
4298 String* answer = String::cast(result); 4305 String* answer = String::cast(result);
4299 answer->set_length(chars); 4306 answer->set_length(chars);
4300 answer->set_hash_field(hash_field); 4307 answer->set_hash_field(hash_field);
4301 4308
4302 ASSERT_EQ(size, answer->Size()); 4309 ASSERT_EQ(size, answer->Size());
4303 4310
4304 // Fill in the characters. 4311 // Fill in the characters.
4305 for (int i = 0; i < chars; i++) { 4312 int i = 0;
4306 answer->Set(i, buffer->GetNext()); 4313 while (i < chars) {
4314 uint32_t character = buffer->GetNext();
4315 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
4316 answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
4317 answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
4318 } else {
4319 answer->Set(i++, character);
4320 }
4307 } 4321 }
4308 return answer; 4322 return answer;
4309 } 4323 }
4310 4324
4311 4325
4312 MaybeObject* Heap::AllocateRawAsciiString(int length, PretenureFlag pretenure) { 4326 MaybeObject* Heap::AllocateRawAsciiString(int length, PretenureFlag pretenure) {
4313 if (length < 0 || length > SeqAsciiString::kMaxLength) { 4327 if (length < 0 || length > SeqAsciiString::kMaxLength) {
4314 return Failure::OutOfMemoryException(); 4328 return Failure::OutOfMemoryException();
4315 } 4329 }
4316 4330
(...skipping 2624 matching lines...) Expand 10 before | Expand all | Expand 10 after
6941 isolate_->heap()->store_buffer()->Compact(); 6955 isolate_->heap()->store_buffer()->Compact();
6942 isolate_->heap()->store_buffer()->Filter(MemoryChunk::ABOUT_TO_BE_FREED); 6956 isolate_->heap()->store_buffer()->Filter(MemoryChunk::ABOUT_TO_BE_FREED);
6943 for (chunk = chunks_queued_for_free_; chunk != NULL; chunk = next) { 6957 for (chunk = chunks_queued_for_free_; chunk != NULL; chunk = next) {
6944 next = chunk->next_chunk(); 6958 next = chunk->next_chunk();
6945 isolate_->memory_allocator()->Free(chunk); 6959 isolate_->memory_allocator()->Free(chunk);
6946 } 6960 }
6947 chunks_queued_for_free_ = NULL; 6961 chunks_queued_for_free_ = NULL;
6948 } 6962 }
6949 6963
6950 } } // namespace v8::internal 6964 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698