Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: test/cctest/test-api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 5511 matching lines...) Expand 10 before | Expand all | Expand 10 after
5522 while (true) { 5522 while (true) {
5523 if (n-- == 0) return 0; 5523 if (n-- == 0) return 0;
5524 if (*a == 0 && *b == 0) return 0; 5524 if (*a == 0 && *b == 0) return 0;
5525 if (*a != *b) return 0 + *a - *b; 5525 if (*a != *b) return 0 + *a - *b;
5526 a++; 5526 a++;
5527 b++; 5527 b++;
5528 } 5528 }
5529 } 5529 }
5530 5530
5531 5531
5532 int GetUtf8Length(Handle<String> str) {
5533 int len = str->Utf8Length();
5534 if (len < 0) {
5535 i::Handle<i::String> istr(v8::Utils::OpenHandle(*str));
5536 i::FlattenString(istr);
5537 len = str->Utf8Length();
5538 }
5539 return len;
5540 }
5541
5542
5532 THREADED_TEST(StringWrite) { 5543 THREADED_TEST(StringWrite) {
5533 LocalContext context; 5544 LocalContext context;
5534 v8::HandleScope scope; 5545 v8::HandleScope scope;
5535 v8::Handle<String> str = v8_str("abcde"); 5546 v8::Handle<String> str = v8_str("abcde");
5536 // abc<Icelandic eth><Unicode snowman>. 5547 // abc<Icelandic eth><Unicode snowman>.
5537 v8::Handle<String> str2 = v8_str("abc\303\260\342\230\203"); 5548 v8::Handle<String> str2 = v8_str("abc\303\260\342\230\203");
5538 const int kStride = 4; // Must match stride in for loops in JS below. 5549 const int kStride = 4; // Must match stride in for loops in JS below.
5539 CompileRun( 5550 CompileRun(
5540 "var left = '';" 5551 "var left = '';"
5541 "for (var i = 0; i < 0xd800; i += 4) {" 5552 "for (var i = 0; i < 0xd800; i += 4) {"
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
5602 CHECK_EQ(3, charlen); 5613 CHECK_EQ(3, charlen);
5603 CHECK_EQ(0, strncmp(utf8buf, "abc\1", 4)); 5614 CHECK_EQ(0, strncmp(utf8buf, "abc\1", 4));
5604 5615
5605 memset(utf8buf, 0x1, 1000); 5616 memset(utf8buf, 0x1, 1000);
5606 len = str2->WriteUtf8(utf8buf, 2, &charlen); 5617 len = str2->WriteUtf8(utf8buf, 2, &charlen);
5607 CHECK_EQ(2, len); 5618 CHECK_EQ(2, len);
5608 CHECK_EQ(2, charlen); 5619 CHECK_EQ(2, charlen);
5609 CHECK_EQ(0, strncmp(utf8buf, "ab\1", 3)); 5620 CHECK_EQ(0, strncmp(utf8buf, "ab\1", 3));
5610 5621
5611 memset(utf8buf, 0x1, sizeof(utf8buf)); 5622 memset(utf8buf, 0x1, sizeof(utf8buf));
5612 len = left_tree->Utf8Length(); 5623 len = GetUtf8Length(left_tree);
5613 int utf8_expected = 5624 int utf8_expected =
5614 (0x80 + (0x800 - 0x80) * 2 + (0xd800 - 0x800) * 3) / kStride; 5625 (0x80 + (0x800 - 0x80) * 2 + (0xd800 - 0x800) * 3) / kStride;
5615 CHECK_EQ(utf8_expected, len); 5626 CHECK_EQ(utf8_expected, len);
5616 len = left_tree->WriteUtf8(utf8buf, utf8_expected, &charlen); 5627 len = left_tree->WriteUtf8(utf8buf, utf8_expected, &charlen);
5617 CHECK_EQ(utf8_expected, len); 5628 CHECK_EQ(utf8_expected, len);
5618 CHECK_EQ(0xd800 / kStride, charlen); 5629 CHECK_EQ(0xd800 / kStride, charlen);
5619 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[utf8_expected - 3])); 5630 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[utf8_expected - 3]));
5620 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[utf8_expected - 2])); 5631 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[utf8_expected - 2]));
5621 CHECK_EQ(0xc0 - kStride, 5632 CHECK_EQ(0xc0 - kStride,
5622 static_cast<unsigned char>(utf8buf[utf8_expected - 1])); 5633 static_cast<unsigned char>(utf8buf[utf8_expected - 1]));
5623 CHECK_EQ(1, utf8buf[utf8_expected]); 5634 CHECK_EQ(1, utf8buf[utf8_expected]);
5624 5635
5625 memset(utf8buf, 0x1, sizeof(utf8buf)); 5636 memset(utf8buf, 0x1, sizeof(utf8buf));
5626 len = right_tree->Utf8Length(); 5637 len = GetUtf8Length(right_tree);
5627 CHECK_EQ(utf8_expected, len); 5638 CHECK_EQ(utf8_expected, len);
5628 len = right_tree->WriteUtf8(utf8buf, utf8_expected, &charlen); 5639 len = right_tree->WriteUtf8(utf8buf, utf8_expected, &charlen);
5629 CHECK_EQ(utf8_expected, len); 5640 CHECK_EQ(utf8_expected, len);
5630 CHECK_EQ(0xd800 / kStride, charlen); 5641 CHECK_EQ(0xd800 / kStride, charlen);
5631 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[0])); 5642 CHECK_EQ(0xed, static_cast<unsigned char>(utf8buf[0]));
5632 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[1])); 5643 CHECK_EQ(0x9f, static_cast<unsigned char>(utf8buf[1]));
5633 CHECK_EQ(0xc0 - kStride, static_cast<unsigned char>(utf8buf[2])); 5644 CHECK_EQ(0xc0 - kStride, static_cast<unsigned char>(utf8buf[2]));
5634 CHECK_EQ(1, utf8buf[utf8_expected]); 5645 CHECK_EQ(1, utf8buf[utf8_expected]);
5635 5646
5636 memset(buf, 0x1, sizeof(buf)); 5647 memset(buf, 0x1, sizeof(buf));
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
5741 CHECK_EQ(8, len); 5752 CHECK_EQ(8, len);
5742 CHECK_EQ('X', utf8buf[8]); 5753 CHECK_EQ('X', utf8buf[8]);
5743 CHECK_EQ(5, charlen); 5754 CHECK_EQ(5, charlen);
5744 CHECK_EQ(0, strncmp(utf8buf, "abc\303\260\342\230\203", 8)); 5755 CHECK_EQ(0, strncmp(utf8buf, "abc\303\260\342\230\203", 8));
5745 CHECK_NE(0, strcmp(utf8buf, "abc\303\260\342\230\203")); 5756 CHECK_NE(0, strcmp(utf8buf, "abc\303\260\342\230\203"));
5746 utf8buf[8] = '\0'; 5757 utf8buf[8] = '\0';
5747 CHECK_EQ(0, strcmp(utf8buf, "abc\303\260\342\230\203")); 5758 CHECK_EQ(0, strcmp(utf8buf, "abc\303\260\342\230\203"));
5748 } 5759 }
5749 5760
5750 5761
5762 static void Utf16Helper(
5763 LocalContext& context,
5764 const char* name,
5765 const char* lengths_name,
5766 int len) {
5767 Local<v8::Array> a =
5768 Local<v8::Array>::Cast(context->Global()->Get(v8_str(name)));
5769 Local<v8::Array> alens =
5770 Local<v8::Array>::Cast(context->Global()->Get(v8_str(lengths_name)));
5771 for (int i = 0; i < len; i++) {
5772 Local<v8::String> string =
5773 Local<v8::String>::Cast(a->Get(i));
5774 Local<v8::Number> expected_len =
5775 Local<v8::Number>::Cast(alens->Get(i));
5776 int length = GetUtf8Length(string);
5777 CHECK_EQ(static_cast<int>(expected_len->Value()), length);
5778 }
5779 }
5780
5781
5782 static uint16_t StringGet(Handle<String> str, int index) {
5783 i::Handle<i::String> istring =
5784 v8::Utils::OpenHandle(String::Cast(*str));
5785 return istring->Get(index);
5786 }
5787
5788
5789 static void WriteUtf8Helper(
5790 LocalContext& context,
5791 const char* name,
5792 const char* lengths_name,
5793 int len) {
5794 Local<v8::Array> b =
5795 Local<v8::Array>::Cast(context->Global()->Get(v8_str(name)));
5796 Local<v8::Array> alens =
5797 Local<v8::Array>::Cast(context->Global()->Get(v8_str(lengths_name)));
5798 char buffer[1000];
5799 char buffer2[1000];
5800 for (int i = 0; i < len; i++) {
5801 Local<v8::String> string =
5802 Local<v8::String>::Cast(b->Get(i));
5803 Local<v8::Number> expected_len =
5804 Local<v8::Number>::Cast(alens->Get(i));
5805 int utf8_length = static_cast<int>(expected_len->Value());
5806 for (int j = utf8_length + 1; j >= 0; j--) {
5807 memset(reinterpret_cast<void*>(&buffer), 42, sizeof(buffer));
5808 memset(reinterpret_cast<void*>(&buffer2), 42, sizeof(buffer2));
5809 int nchars;
5810 int utf8_written =
5811 string->WriteUtf8(buffer, j, &nchars, String::NO_OPTIONS);
5812 int utf8_written2 =
5813 string->WriteUtf8(buffer2, j, &nchars, String::NO_NULL_TERMINATION);
5814 CHECK_GE(utf8_length + 1, utf8_written);
5815 CHECK_GE(utf8_length, utf8_written2);
5816 for (int k = 0; k < utf8_written2; k++) {
5817 CHECK_EQ(buffer[k], buffer2[k]);
5818 }
5819 CHECK(nchars * 3 >= utf8_written - 1);
5820 CHECK(nchars <= utf8_written);
5821 if (j == utf8_length + 1) {
5822 CHECK_EQ(utf8_written2, utf8_length);
5823 CHECK_EQ(utf8_written2 + 1, utf8_written);
5824 }
5825 CHECK_EQ(buffer[utf8_written], 42);
5826 if (j > utf8_length) {
5827 if (utf8_written != 0) CHECK_EQ(buffer[utf8_written - 1], 0);
5828 if (utf8_written > 1) CHECK_NE(buffer[utf8_written - 2], 42);
5829 Handle<String> roundtrip = v8_str(buffer);
5830 CHECK(roundtrip->Equals(string));
5831 } else {
5832 if (utf8_written != 0) CHECK_NE(buffer[utf8_written - 1], 42);
5833 }
5834 if (utf8_written2 != 0) CHECK_NE(buffer[utf8_written - 1], 42);
5835 if (nchars >= 2) {
5836 uint16_t trail = StringGet(string, nchars - 1);
5837 uint16_t lead = StringGet(string, nchars - 2);
5838 if (((lead & 0xfc00) == 0xd800) &&
5839 ((trail & 0xfc00) == 0xdc00)) {
5840 unsigned char u1 = buffer2[utf8_written2 - 4];
5841 unsigned char u2 = buffer2[utf8_written2 - 3];
5842 unsigned char u3 = buffer2[utf8_written2 - 2];
5843 unsigned char u4 = buffer2[utf8_written2 - 1];
5844 CHECK_EQ((u1 & 0xf8), 0xf0);
5845 CHECK_EQ((u2 & 0xc0), 0x80);
5846 CHECK_EQ((u3 & 0xc0), 0x80);
5847 CHECK_EQ((u4 & 0xc0), 0x80);
5848 uint32_t c = 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff);
5849 CHECK_EQ((u4 & 0x3f), (c & 0x3f));
5850 CHECK_EQ((u3 & 0x3f), ((c >> 6) & 0x3f));
5851 CHECK_EQ((u2 & 0x3f), ((c >> 12) & 0x3f));
5852 CHECK_EQ((u1 & 0x3), c >> 18);
5853 }
5854 }
5855 }
5856 }
5857 }
5858
5859
5860 THREADED_TEST(Utf16) {
5861 LocalContext context;
5862 v8::HandleScope scope;
5863 CompileRun(
5864 "var pad = '01234567890123456789';"
5865 "var p = [];"
5866 "var plens = [20, 3, 3];"
5867 "p.push('01234567890123456789');"
5868 "var lead = 0xd800;"
5869 "var trail = 0xdc00;"
5870 "p.push(String.fromCharCode(0xd800));"
5871 "p.push(String.fromCharCode(0xdc00));"
5872 "var a = [];"
5873 "var b = [];"
5874 "var alens = [];"
5875 "for (var i = 0; i < 3; i++) {"
5876 " p[1] = String.fromCharCode(lead++);"
5877 " for (var j = 0; j < 3; j++) {"
5878 " p[2] = String.fromCharCode(trail++);"
5879 " a.push(p[i] + p[j]);"
5880 " b.push(p[i] + p[j]);"
5881 " alens.push(plens[i] + plens[j]);"
5882 " }"
5883 "}"
5884 "alens[5] -= 2;" // Here the surrogate pairs match up.
5885 "var a2 = [];"
5886 "var b2 = [];"
5887 "var a2lens = [];"
5888 "for (var m = 0; m < 9; m++) {"
5889 " for (var n = 0; n < 9; n++) {"
5890 " a2.push(a[m] + a[n]);"
5891 " b2.push(b[m] + b[n]);"
5892 " var utf = alens[m] + alens[n];" // And here.
5893 // The 'n's that start with 0xdc.. are 6-8
5894 // The 'm's that end with 0xd8.. are 1, 4 and 7
5895 " if ((m % 3) == 1 && n >= 6) utf -= 2;"
5896 " a2lens.push(utf);"
5897 " }"
5898 "}");
5899 Utf16Helper(context, "a", "alens", 9);
5900 Utf16Helper(context, "a2", "a2lens", 81);
5901 WriteUtf8Helper(context, "b", "alens", 9);
5902 WriteUtf8Helper(context, "b2", "a2lens", 81);
5903 }
5904
5905
5906 static bool SameSymbol(Handle<String> s1, Handle<String> s2) {
5907 i::Handle<i::String> is1(v8::Utils::OpenHandle(*s1));
5908 i::Handle<i::String> is2(v8::Utils::OpenHandle(*s2));
5909 return *is1 == *is2;
5910 }
5911
5912
5913 static void SameSymbolHelper(const char* a, const char* b) {
5914 Handle<String> symbol1 = v8::String::NewSymbol(a);
5915 Handle<String> symbol2 = v8::String::NewSymbol(b);
5916 CHECK(SameSymbol(symbol1, symbol2));
5917 }
5918
5919
5920 THREADED_TEST(Utf16Symbol) {
5921 LocalContext context;
5922 v8::HandleScope scope;
5923
5924 Handle<String> symbol1 = v8::String::NewSymbol("abc");
5925 Handle<String> symbol2 = v8::String::NewSymbol("abc");
5926 CHECK(SameSymbol(symbol1, symbol2));
5927
5928 SameSymbolHelper("\360\220\220\205", // 4 byte encoding.
5929 "\355\240\201\355\260\205"); // 2 3-byte surrogates.
5930 SameSymbolHelper("\355\240\201\355\260\206", // 2 3-byte surrogates.
5931 "\360\220\220\206"); // 4 byte encoding.
5932 SameSymbolHelper("x\360\220\220\205", // 4 byte encoding.
5933 "x\355\240\201\355\260\205"); // 2 3-byte surrogates.
5934 SameSymbolHelper("x\355\240\201\355\260\206", // 2 3-byte surrogates.
5935 "x\360\220\220\206"); // 4 byte encoding.
5936 CompileRun(
5937 "var sym0 = 'benedictus';"
5938 "var sym0b = 'S\303\270ren';"
5939 "var sym1 = '\355\240\201\355\260\207';"
5940 "var sym2 = '\360\220\220\210';"
5941 "var sym3 = 'x\355\240\201\355\260\207';"
5942 "var sym4 = 'x\360\220\220\210';"
5943 "if (sym1.length != 2) throw sym1;"
5944 "if (sym1.charCodeAt(1) != 0xdc07) throw sym1.charCodeAt(1);"
5945 "if (sym2.length != 2) throw sym2;"
5946 "if (sym2.charCodeAt(1) != 0xdc08) throw sym2.charCodeAt(2);"
5947 "if (sym3.length != 3) throw sym3;"
5948 "if (sym3.charCodeAt(2) != 0xdc07) throw sym1.charCodeAt(2);"
5949 "if (sym4.length != 3) throw sym4;"
5950 "if (sym4.charCodeAt(2) != 0xdc08) throw sym2.charCodeAt(2);");
5951 Handle<String> sym0 = v8::String::NewSymbol("benedictus");
5952 Handle<String> sym0b = v8::String::NewSymbol("S\303\270ren");
5953 Handle<String> sym1 = v8::String::NewSymbol("\355\240\201\355\260\207");
5954 Handle<String> sym2 = v8::String::NewSymbol("\360\220\220\210");
5955 Handle<String> sym3 = v8::String::NewSymbol("x\355\240\201\355\260\207");
5956 Handle<String> sym4 = v8::String::NewSymbol("x\360\220\220\210");
5957 v8::Local<v8::Object> global = context->Global();
5958 Local<Value> s0 = global->Get(v8_str("sym0"));
5959 Local<Value> s0b = global->Get(v8_str("sym0b"));
5960 Local<Value> s1 = global->Get(v8_str("sym1"));
5961 Local<Value> s2 = global->Get(v8_str("sym2"));
5962 Local<Value> s3 = global->Get(v8_str("sym3"));
5963 Local<Value> s4 = global->Get(v8_str("sym4"));
5964 CHECK(SameSymbol(sym0, Handle<String>(String::Cast(*s0))));
5965 CHECK(SameSymbol(sym0b, Handle<String>(String::Cast(*s0b))));
5966 CHECK(SameSymbol(sym1, Handle<String>(String::Cast(*s1))));
5967 CHECK(SameSymbol(sym2, Handle<String>(String::Cast(*s2))));
5968 CHECK(SameSymbol(sym3, Handle<String>(String::Cast(*s3))));
5969 CHECK(SameSymbol(sym4, Handle<String>(String::Cast(*s4))));
5970 }
5971
5972
5751 THREADED_TEST(ToArrayIndex) { 5973 THREADED_TEST(ToArrayIndex) {
5752 v8::HandleScope scope; 5974 v8::HandleScope scope;
5753 LocalContext context; 5975 LocalContext context;
5754 5976
5755 v8::Handle<String> str = v8_str("42"); 5977 v8::Handle<String> str = v8_str("42");
5756 v8::Handle<v8::Uint32> index = str->ToArrayIndex(); 5978 v8::Handle<v8::Uint32> index = str->ToArrayIndex();
5757 CHECK(!index.IsEmpty()); 5979 CHECK(!index.IsEmpty());
5758 CHECK_EQ(42.0, index->Uint32Value()); 5980 CHECK_EQ(42.0, index->Uint32Value());
5759 str = v8_str("42asdf"); 5981 str = v8_str("42asdf");
5760 index = str->ToArrayIndex(); 5982 index = str->ToArrayIndex();
(...skipping 10403 matching lines...) Expand 10 before | Expand all | Expand 10 after
16164 16386
16165 TEST(SecondaryStubCache) { 16387 TEST(SecondaryStubCache) {
16166 StubCacheHelper(true); 16388 StubCacheHelper(true);
16167 } 16389 }
16168 16390
16169 16391
16170 TEST(PrimaryStubCache) { 16392 TEST(PrimaryStubCache) {
16171 StubCacheHelper(false); 16393 StubCacheHelper(false);
16172 } 16394 }
16173 16395
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698