Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(269)

Side by Side Diff: src/api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/handles.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1411 matching lines...) Expand 10 before | Expand all | Expand 10 after
1422 EnsureConstructor(this); 1422 EnsureConstructor(this);
1423 } 1423 }
1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value)); 1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value));
1425 } 1425 }
1426 1426
1427 1427
1428 // --- S c r i p t D a t a --- 1428 // --- S c r i p t D a t a ---
1429 1429
1430 1430
1431 ScriptData* ScriptData::PreCompile(const char* input, int length) { 1431 ScriptData* ScriptData::PreCompile(const char* input, int length) {
1432 i::Utf8ToUC16CharacterStream stream( 1432 i::Utf8ToUtf16CharacterStream stream(
1433 reinterpret_cast<const unsigned char*>(input), length); 1433 reinterpret_cast<const unsigned char*>(input), length);
1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1435 } 1435 }
1436 1436
1437 1437
1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) { 1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
1439 i::Handle<i::String> str = Utils::OpenHandle(*source); 1439 i::Handle<i::String> str = Utils::OpenHandle(*source);
1440 if (str->IsExternalTwoByteString()) { 1440 if (str->IsExternalTwoByteString()) {
1441 i::ExternalTwoByteStringUC16CharacterStream stream( 1441 i::ExternalTwoByteStringUtf16CharacterStream stream(
1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length()); 1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1444 } else { 1444 } else {
1445 i::GenericStringUC16CharacterStream stream(str, 0, str->length()); 1445 i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1447 } 1447 }
1448 } 1448 }
1449 1449
1450 1450
1451 ScriptData* ScriptData::New(const char* data, int length) { 1451 ScriptData* ScriptData::New(const char* data, int length) {
1452 // Return an empty ScriptData if the length is obviously invalid. 1452 // Return an empty ScriptData if the length is obviously invalid.
1453 if (length % sizeof(unsigned) != 0) { 1453 if (length % sizeof(unsigned) != 0) {
1454 return new i::ScriptDataImpl(); 1454 return new i::ScriptDataImpl();
1455 } 1455 }
(...skipping 2226 matching lines...) Expand 10 before | Expand all | Expand 10 after
3682 int String::Length() const { 3682 int String::Length() const {
3683 i::Handle<i::String> str = Utils::OpenHandle(this); 3683 i::Handle<i::String> str = Utils::OpenHandle(this);
3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0; 3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0;
3685 return str->length(); 3685 return str->length();
3686 } 3686 }
3687 3687
3688 3688
3689 int String::Utf8Length() const { 3689 int String::Utf8Length() const {
3690 i::Handle<i::String> str = Utils::OpenHandle(this); 3690 i::Handle<i::String> str = Utils::OpenHandle(this);
3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0; 3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
3692 return str->Utf8Length(); 3692 return i::Utf8Length(str);
3693 } 3693 }
3694 3694
3695 3695
3696 int String::WriteUtf8(char* buffer, 3696 int String::WriteUtf8(char* buffer,
3697 int capacity, 3697 int capacity,
3698 int* nchars_ref, 3698 int* nchars_ref,
3699 int options) const { 3699 int options) const {
3700 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); 3700 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();
3701 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; 3701 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0;
3702 LOG_API(isolate, "String::WriteUtf8"); 3702 LOG_API(isolate, "String::WriteUtf8");
(...skipping 25 matching lines...) Expand all
3728 } 3728 }
3729 write_input_buffer.Reset(0, *str); 3729 write_input_buffer.Reset(0, *str);
3730 int len = str->length(); 3730 int len = str->length();
3731 // Encode the first K - 3 bytes directly into the buffer since we 3731 // Encode the first K - 3 bytes directly into the buffer since we
3732 // know there's room for them. If no capacity is given we copy all 3732 // know there's room for them. If no capacity is given we copy all
3733 // of them here. 3733 // of them here.
3734 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1); 3734 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
3735 int i; 3735 int i;
3736 int pos = 0; 3736 int pos = 0;
3737 int nchars = 0; 3737 int nchars = 0;
3738 int previous = unibrow::Utf16::kNoPreviousCharacter;
3738 for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) { 3739 for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
3739 i::uc32 c = write_input_buffer.GetNext(); 3740 i::uc32 c = write_input_buffer.GetNext();
3740 int written = unibrow::Utf8::Encode(buffer + pos, c); 3741 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
3741 pos += written; 3742 pos += written;
3742 nchars++; 3743 nchars++;
3744 previous = c;
3743 } 3745 }
3744 if (i < len) { 3746 if (i < len) {
3745 // For the last characters we need to check the length for each one 3747 // For the last characters we need to check the length for each one
3746 // because they may be longer than the remaining space in the 3748 // because they may be longer than the remaining space in the
3747 // buffer. 3749 // buffer.
3748 char intermediate[unibrow::Utf8::kMaxEncodedSize]; 3750 char intermediate[unibrow::Utf8::kMaxEncodedSize];
3749 for (; i < len && pos < capacity; i++) { 3751 for (; i < len && pos < capacity; i++) {
3750 i::uc32 c = write_input_buffer.GetNext(); 3752 i::uc32 c = write_input_buffer.GetNext();
3751 int written = unibrow::Utf8::Encode(intermediate, c); 3753 if (unibrow::Utf16::IsTrailSurrogate(c) &&
3752 if (pos + written <= capacity) { 3754 unibrow::Utf16::IsLeadSurrogate(previous)) {
3753 for (int j = 0; j < written; j++) 3755 // We can't use the intermediate buffer here because the encoding
3754 buffer[pos + j] = intermediate[j]; 3756 // of surrogate pairs is done under assumption that you can step
3757 // back and fix the UTF8 stream. Luckily we only need space for one
3758 // more byte, so there is always space.
3759 ASSERT(pos < capacity);
3760 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
3761 ASSERT(written == 1);
3755 pos += written; 3762 pos += written;
3756 nchars++; 3763 nchars++;
3757 } else { 3764 } else {
3758 // We've reached the end of the buffer 3765 int written =
3759 break; 3766 unibrow::Utf8::Encode(intermediate,
3767 c,
3768 unibrow::Utf16::kNoPreviousCharacter);
3769 if (pos + written <= capacity) {
3770 for (int j = 0; j < written; j++)
3771 buffer[pos + j] = intermediate[j];
3772 pos += written;
3773 nchars++;
3774 } else {
3775 // We've reached the end of the buffer
3776 break;
3777 }
3760 } 3778 }
3779 previous = c;
3761 } 3780 }
3762 } 3781 }
3763 if (nchars_ref != NULL) *nchars_ref = nchars; 3782 if (nchars_ref != NULL) *nchars_ref = nchars;
3764 if (!(options & NO_NULL_TERMINATION) && 3783 if (!(options & NO_NULL_TERMINATION) &&
3765 (i == len && (capacity == -1 || pos < capacity))) 3784 (i == len && (capacity == -1 || pos < capacity)))
3766 buffer[pos++] = '\0'; 3785 buffer[pos++] = '\0';
3767 return pos; 3786 return pos;
3768 } 3787 }
3769 3788
3770 3789
(...skipping 1459 matching lines...) Expand 10 before | Expand all | Expand 10 after
5230 String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj) 5249 String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj)
5231 : str_(NULL), length_(0) { 5250 : str_(NULL), length_(0) {
5232 i::Isolate* isolate = i::Isolate::Current(); 5251 i::Isolate* isolate = i::Isolate::Current();
5233 if (IsDeadCheck(isolate, "v8::String::Utf8Value::Utf8Value()")) return; 5252 if (IsDeadCheck(isolate, "v8::String::Utf8Value::Utf8Value()")) return;
5234 if (obj.IsEmpty()) return; 5253 if (obj.IsEmpty()) return;
5235 ENTER_V8(isolate); 5254 ENTER_V8(isolate);
5236 i::HandleScope scope(isolate); 5255 i::HandleScope scope(isolate);
5237 TryCatch try_catch; 5256 TryCatch try_catch;
5238 Handle<String> str = obj->ToString(); 5257 Handle<String> str = obj->ToString();
5239 if (str.IsEmpty()) return; 5258 if (str.IsEmpty()) return;
5240 length_ = str->Utf8Length(); 5259 i::Handle<i::String> i_str = Utils::OpenHandle(*str);
5260 length_ = i::Utf8Length(i_str);
5241 str_ = i::NewArray<char>(length_ + 1); 5261 str_ = i::NewArray<char>(length_ + 1);
5242 str->WriteUtf8(str_); 5262 str->WriteUtf8(str_);
5243 } 5263 }
5244 5264
5245 5265
5246 String::Utf8Value::~Utf8Value() { 5266 String::Utf8Value::~Utf8Value() {
5247 i::DeleteArray(str_); 5267 i::DeleteArray(str_);
5248 } 5268 }
5249 5269
5250 5270
(...skipping 943 matching lines...) Expand 10 before | Expand all | Expand 10 after
6194 6214
6195 6215
6196 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) { 6216 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) {
6197 HandleScopeImplementer* scope_implementer = 6217 HandleScopeImplementer* scope_implementer =
6198 reinterpret_cast<HandleScopeImplementer*>(storage); 6218 reinterpret_cast<HandleScopeImplementer*>(storage);
6199 scope_implementer->IterateThis(v); 6219 scope_implementer->IterateThis(v);
6200 return storage + ArchiveSpacePerThread(); 6220 return storage + ArchiveSpacePerThread();
6201 } 6221 }
6202 6222
6203 } } // namespace v8::internal 6223 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/handles.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698