test/cctest/test-parsing.cc - Issue 9600009: Fix input and output to handle UTF16 surrogate pairs.

Unified Diff: test/cctest/test-parsing.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: test/cctest/test-parsing.cc

===================================================================

--- test/cctest/test-parsing.cc (revision 10944)

+++ test/cctest/test-parsing.cc (working copy)

@@ -63,7 +63,7 @@

int length = i::StrLength(key_token.keyword);

CHECK(static_cast<int>(sizeof(buffer)) >= length);

{

- i::Utf8ToUC16CharacterStream stream(keyword, length);

+ i::Utf8ToUtf16CharacterStream stream(keyword, length);

i::Scanner scanner(&unicode_cache);

// The scanner should parse Harmony keywords for this test.

scanner.SetHarmonyScoping(true);

@@ -74,7 +74,7 @@

}

// Removing characters will make keyword matching fail.

{

- i::Utf8ToUC16CharacterStream stream(keyword, length - 1);

+ i::Utf8ToUtf16CharacterStream stream(keyword, length - 1);

i::Scanner scanner(&unicode_cache);

scanner.Initialize(&stream);

CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());

@@ -85,7 +85,7 @@

for (int j = 0; j < static_cast<int>(ARRAY_SIZE(chars_to_append)); ++j) {

memmove(buffer, keyword, length);

buffer[length] = chars_to_append[j];

- i::Utf8ToUC16CharacterStream stream(buffer, length + 1);

+ i::Utf8ToUtf16CharacterStream stream(buffer, length + 1);

i::Scanner scanner(&unicode_cache);

scanner.Initialize(&stream);

CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());

@@ -95,7 +95,7 @@

{

memmove(buffer, keyword, length);

buffer[length - 1] = '_';

- i::Utf8ToUC16CharacterStream stream(buffer, length);

+ i::Utf8ToUtf16CharacterStream stream(buffer, length);

i::Scanner scanner(&unicode_cache);

scanner.Initialize(&stream);

CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());

@@ -255,7 +255,7 @@

uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();

for (int i = 0; programs[i]; i++) {

const char* program = programs[i];

- i::Utf8ToUC16CharacterStream stream(

+ i::Utf8ToUtf16CharacterStream stream(

reinterpret_cast<const i::byte*>(program),

static_cast<unsigned>(strlen(program)));

i::CompleteParserRecorder log;

@@ -291,7 +291,7 @@

uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();

for (int i = 0; programs[i]; i++) {

const char* program = programs[i];

- i::Utf8ToUC16CharacterStream stream(

+ i::Utf8ToUtf16CharacterStream stream(

reinterpret_cast<const i::byte*>(program),

static_cast<unsigned>(strlen(program)));

i::CompleteParserRecorder log;

@@ -326,8 +326,9 @@

// and then used the invalid currently scanned literal. This always

// failed in debug mode, and sometimes crashed in release mode.

- i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),

- static_cast<unsigned>(strlen(program)));

+ i::Utf8ToUtf16CharacterStream stream(

+ reinterpret_cast<const i::byte*>(program),

+ static_cast<unsigned>(strlen(program)));

i::ScriptDataImpl* data =

i::ParserApi::PreParse(&stream, NULL, false);

CHECK(data->HasError());

@@ -392,7 +393,7 @@

uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();

- i::Utf8ToUC16CharacterStream stream(

+ i::Utf8ToUtf16CharacterStream stream(

reinterpret_cast<const i::byte*>(*program),

static_cast<unsigned>(kProgramSize));

i::CompleteParserRecorder log;

@@ -449,10 +450,10 @@

i::Handle<i::String> uc16_string(

FACTORY->NewExternalStringFromTwoByte(&resource));

- i::ExternalTwoByteStringUC16CharacterStream uc16_stream(

+ i::ExternalTwoByteStringUtf16CharacterStream uc16_stream(

i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);

- i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);

- i::Utf8ToUC16CharacterStream utf8_stream(

+ i::GenericStringUtf16CharacterStream string_stream(ascii_string, start, end);

+ i::Utf8ToUtf16CharacterStream utf8_stream(

reinterpret_cast<const i::byte*>(ascii_source), end);

utf8_stream.SeekForward(start);

@@ -575,12 +576,14 @@

char buffer[kAllUtf8CharsSizeU];

unsigned cursor = 0;

for (int i = 0; i <= kMaxUC16Char; i++) {

- cursor += unibrow::Utf8::Encode(buffer + cursor, i);

+ cursor += unibrow::Utf8::Encode(buffer + cursor,

+ i,

+ unibrow::Utf16::kNoPreviousCharacter);

}

ASSERT(cursor == kAllUtf8CharsSizeU);

- i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),

- kAllUtf8CharsSizeU);

+ i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),

+ kAllUtf8CharsSizeU);

for (int i = 0; i <= kMaxUC16Char; i++) {

CHECK_EQU(i, stream.pos());

int32_t c = stream.Advance();

@@ -610,7 +613,7 @@

#undef CHECK_EQU

-void TestStreamScanner(i::UC16CharacterStream* stream,

+void TestStreamScanner(i::Utf16CharacterStream* stream,

i::Token::Value* expected_tokens,

int skip_pos = 0, // Zero means not skipping.

int skip_to = 0) {

@@ -633,8 +636,8 @@

v8::V8::Initialize();

const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";

- i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),

- static_cast<unsigned>(strlen(str1)));

+ i::Utf8ToUtf16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),

+ static_cast<unsigned>(strlen(str1)));

i::Token::Value expectations1[] = {

i::Token::LBRACE,

i::Token::IDENTIFIER,

@@ -652,8 +655,8 @@

TestStreamScanner(&stream1, expectations1, 0, 0);

const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";

- i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),

- static_cast<unsigned>(strlen(str2)));

+ i::Utf8ToUtf16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),

+ static_cast<unsigned>(strlen(str2)));

i::Token::Value expectations2[] = {

i::Token::CASE,

i::Token::DEFAULT,

@@ -683,7 +686,7 @@

for (int i = 0; i <= 4; i++) {

expectations3[6 - i] = i::Token::ILLEGAL;

expectations3[5 - i] = i::Token::EOS;

- i::Utf8ToUC16CharacterStream stream3(

+ i::Utf8ToUtf16CharacterStream stream3(

reinterpret_cast<const i::byte*>(str3),

static_cast<unsigned>(strlen(str3)));

TestStreamScanner(&stream3, expectations3, 1, 1 + i);

@@ -692,7 +695,7 @@

void TestScanRegExp(const char* re_source, const char* expected) {

- i::Utf8ToUC16CharacterStream stream(

+ i::Utf8ToUtf16CharacterStream stream(

reinterpret_cast<const i::byte*>(re_source),

static_cast<unsigned>(strlen(re_source)));

i::Scanner scanner(i::Isolate::Current()->unicode_cache());

@@ -835,6 +838,20 @@

{ " for ", "(let x in {})\n"

" statement;", "\n"

" more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },

+ // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw

+ // the preparser off in terms of byte offsets.

+ { " 'foo\355\240\201\355\260\211';\n"

+ " (function fun", "(a,b) { infunction; }", ")();",

+ i::FUNCTION_SCOPE, i::CLASSIC_MODE },

+ { " 'foo\360\220\220\212';\n"

+ " (function fun", "(a,b) { infunction; }", ")();",

+ i::FUNCTION_SCOPE, i::CLASSIC_MODE },

+ { " 'foo';\n"

+ " (function fun", "(a,b) { 'bar\355\240\201\355\213'; }", ")();",

+ i::FUNCTION_SCOPE, i::CLASSIC_MODE },

+ { " 'foo';\n"

+ " (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",

+ i::FUNCTION_SCOPE, i::CLASSIC_MODE },

{ NULL, NULL, NULL, i::EVAL_SCOPE, i::CLASSIC_MODE }

};

@@ -894,7 +911,7 @@

// Preparse the data.

i::CompleteParserRecorder log;

i::Scanner scanner(i::Isolate::Current()->unicode_cache());

- i::GenericStringUC16CharacterStream stream(source, 0, source->length());

+ i::GenericStringUtf16CharacterStream stream(source, 0, source->length());

scanner.SetHarmonyScoping(harmony_scoping);

scanner.Initialize(&stream);

v8::preparser::PreParser::PreParseResult result =

« src/unicode.h ('K') | « test/cctest/test-api.cc ('k') | no next file » | no next file with comments »