src/api.cc - Issue 9600009: Fix input and output to handle UTF16 surrogate pairs.

Side by Side Diff: src/api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1411 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1422 EnsureConstructor(this);	1422 EnsureConstructor(this);

1423 }	1423 }

1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value));	1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value));

1425 }	1425 }

1426	1426

1427	1427

1428 // --- S c r i p t D a t a ---	1428 // --- S c r i p t D a t a ---

1429	1429

1430	1430

1431 ScriptData* ScriptData::PreCompile(const char* input, int length) {	1431 ScriptData* ScriptData::PreCompile(const char* input, int length) {

1432 i::Utf8ToUC16CharacterStream stream(	1432 i::Utf8ToUtf16CharacterStream stream(

1433 reinterpret_cast<const unsigned char*>(input), length);	1433 reinterpret_cast<const unsigned char*>(input), length);

1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);	1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);

1435 }	1435 }

1436	1436

1437	1437

1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {	1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {

1439 i::Handle<i::String> str = Utils::OpenHandle(*source);	1439 i::Handle<i::String> str = Utils::OpenHandle(*source);

1440 if (str->IsExternalTwoByteString()) {	1440 if (str->IsExternalTwoByteString()) {

1441 i::ExternalTwoByteStringUC16CharacterStream stream(	1441 i::ExternalTwoByteStringUtf16CharacterStream stream(

1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());	1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());

1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);	1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);

1444 } else {	1444 } else {

1445 i::GenericStringUC16CharacterStream stream(str, 0, str->length());	1445 i::GenericStringUtf16CharacterStream stream(str, 0, str->length());

1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);	1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);

1447 }	1447 }

1448 }	1448 }

1449	1449

1450	1450

1451 ScriptData* ScriptData::New(const char* data, int length) {	1451 ScriptData* ScriptData::New(const char* data, int length) {

1452 // Return an empty ScriptData if the length is obviously invalid.	1452 // Return an empty ScriptData if the length is obviously invalid.

1453 if (length % sizeof(unsigned) != 0) {	1453 if (length % sizeof(unsigned) != 0) {

1454 return new i::ScriptDataImpl();	1454 return new i::ScriptDataImpl();

1455 }	1455 }

(...skipping 2226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3682 int String::Length() const {	3682 int String::Length() const {

3683 i::Handle<i::String> str = Utils::OpenHandle(this);	3683 i::Handle<i::String> str = Utils::OpenHandle(this);

3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0;	3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0;

3685 return str->length();	3685 return str->length();

3686 }	3686 }

3687	3687

3688	3688

3689 int String::Utf8Length() const {	3689 int String::Utf8Length() const {

3690 i::Handle<i::String> str = Utils::OpenHandle(this);	3690 i::Handle<i::String> str = Utils::OpenHandle(this);

3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;	3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;

3692 return str->Utf8Length();	3692 return i::Utf8Length(str);

3693 }	3693 }

3694	3694

3695	3695

3696 int String::WriteUtf8(char* buffer,	3696 int String::WriteUtf8(char* buffer,

3697 int capacity,	3697 int capacity,

3698 int* nchars_ref,	3698 int* nchars_ref,

3699 int options) const {	3699 int options) const {

3700 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();	3700 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();

3701 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0;	3701 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0;

3702 LOG_API(isolate, "String::WriteUtf8");	3702 LOG_API(isolate, "String::WriteUtf8");

(...skipping 25 matching lines...) Expand all Loading...
3728 }	3728 }

3729 write_input_buffer.Reset(0, *str);	3729 write_input_buffer.Reset(0, *str);

3730 int len = str->length();	3730 int len = str->length();

3731 // Encode the first K - 3 bytes directly into the buffer since we	3731 // Encode the first K - 3 bytes directly into the buffer since we

3732 // know there's room for them. If no capacity is given we copy all	3732 // know there's room for them. If no capacity is given we copy all

3733 // of them here.	3733 // of them here.

3734 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);	3734 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);

3735 int i;	3735 int i;

3736 int pos = 0;	3736 int pos = 0;

3737 int nchars = 0;	3737 int nchars = 0;

	3738 int previous = unibrow::Utf16::kNoPreviousCharacter;

3738 for (i = 0; i < len && (capacity == -1 \|\| pos < fast_end); i++) {	3739 for (i = 0; i < len && (capacity == -1 \|\| pos < fast_end); i++) {

3739 i::uc32 c = write_input_buffer.GetNext();	3740 i::uc32 c = write_input_buffer.GetNext();

3740 int written = unibrow::Utf8::Encode(buffer + pos, c);	3741 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);

3741 pos += written;	3742 pos += written;

3742 nchars++;	3743 nchars++;

	3744 previous = c;

3743 }	3745 }

3744 if (i < len) {	3746 if (i < len) {

3745 // For the last characters we need to check the length for each one	3747 // For the last characters we need to check the length for each one

3746 // because they may be longer than the remaining space in the	3748 // because they may be longer than the remaining space in the

3747 // buffer.	3749 // buffer.

3748 char intermediate[unibrow::Utf8::kMaxEncodedSize];	3750 char intermediate[unibrow::Utf8::kMaxEncodedSize];

3749 for (; i < len && pos < capacity; i++) {	3751 for (; i < len && pos < capacity; i++) {

3750 i::uc32 c = write_input_buffer.GetNext();	3752 i::uc32 c = write_input_buffer.GetNext();

3751 int written = unibrow::Utf8::Encode(intermediate, c);	3753 if (unibrow::Utf16::IsTrailSurrogate(c) &&

3752 if (pos + written <= capacity) {	3754 unibrow::Utf16::IsLeadSurrogate(previous)) {

3753 for (int j = 0; j < written; j++)	3755 // We can't use the intermediate buffer here because the encoding

3754 buffer[pos + j] = intermediate[j];	3756 // of surrogate pairs is done under assumption that you can step

	3757 // back and fix the UTF8 stream. Luckily we only need space for one

	3758 // more byte, so there is always space.

	3759 ASSERT(pos < capacity);

	3760 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);

	3761 ASSERT(written == 1);

3755 pos += written;	3762 pos += written;

3756 nchars++;	3763 nchars++;

3757 } else {	3764 } else {

3758 // We've reached the end of the buffer	3765 int written =

3759 break;	3766 unibrow::Utf8::Encode(intermediate,

	3767 c,

	3768 unibrow::Utf16::kNoPreviousCharacter);

	3769 if (pos + written <= capacity) {

	3770 for (int j = 0; j < written; j++)

	3771 buffer[pos + j] = intermediate[j];

	3772 pos += written;

	3773 nchars++;

	3774 } else {

	3775 // We've reached the end of the buffer

	3776 break;

	3777 }

3760 }	3778 }

	3779 previous = c;

3761 }	3780 }

3762 }	3781 }

3763 if (nchars_ref != NULL) *nchars_ref = nchars;	3782 if (nchars_ref != NULL) *nchars_ref = nchars;

3764 if (!(options & NO_NULL_TERMINATION) &&	3783 if (!(options & NO_NULL_TERMINATION) &&

3765 (i == len && (capacity == -1 \|\| pos < capacity)))	3784 (i == len && (capacity == -1 \|\| pos < capacity)))

3766 buffer[pos++] = '\0';	3785 buffer[pos++] = '\0';

3767 return pos;	3786 return pos;

3768 }	3787 }

3769	3788

3770	3789

(...skipping 1459 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5230 String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj)	5249 String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj)

5231 : str_(NULL), length_(0) {	5250 : str_(NULL), length_(0) {

5232 i::Isolate* isolate = i::Isolate::Current();	5251 i::Isolate* isolate = i::Isolate::Current();

5233 if (IsDeadCheck(isolate, "v8::String::Utf8Value::Utf8Value()")) return;	5252 if (IsDeadCheck(isolate, "v8::String::Utf8Value::Utf8Value()")) return;

5234 if (obj.IsEmpty()) return;	5253 if (obj.IsEmpty()) return;

5235 ENTER_V8(isolate);	5254 ENTER_V8(isolate);

5236 i::HandleScope scope(isolate);	5255 i::HandleScope scope(isolate);

5237 TryCatch try_catch;	5256 TryCatch try_catch;

5238 Handle<String> str = obj->ToString();	5257 Handle<String> str = obj->ToString();

5239 if (str.IsEmpty()) return;	5258 if (str.IsEmpty()) return;

5240 length_ = str->Utf8Length();	5259 i::Handle<i::String> i_str = Utils::OpenHandle(*str);

	5260 length_ = i::Utf8Length(i_str);

5241 str_ = i::NewArray<char>(length_ + 1);	5261 str_ = i::NewArray<char>(length_ + 1);

5242 str->WriteUtf8(str_);	5262 str->WriteUtf8(str_);

5243 }	5263 }

5244	5264

5245	5265

5246 String::Utf8Value::~Utf8Value() {	5266 String::Utf8Value::~Utf8Value() {

5247 i::DeleteArray(str_);	5267 i::DeleteArray(str_);

5248 }	5268 }

5249	5269

5250	5270

(...skipping 943 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6194	6214

6195	6215

6196 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) {	6216 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) {

6197 HandleScopeImplementer* scope_implementer =	6217 HandleScopeImplementer* scope_implementer =

6198 reinterpret_cast<HandleScopeImplementer*>(storage);	6218 reinterpret_cast<HandleScopeImplementer*>(storage);

6199 scope_implementer->IterateThis(v);	6219 scope_implementer->IterateThis(v);

6200 return storage + ArchiveSpacePerThread();	6220 return storage + ArchiveSpacePerThread();

6201 }	6221 }

6202	6222

6203 } } // namespace v8::internal	6223 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/handles.h » ('J')