src/objects.cc - Issue 11649018: Remove Utf8InputBuffer

Side by Side Diff: src/objects.cc

Issue 11649018: Remove Utf8InputBuffer (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 7624 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7635 } else if (map == heap->ascii_string_map()) {	7635 } else if (map == heap->ascii_string_map()) {

7636 this->set_map(heap->undetectable_ascii_string_map());	7636 this->set_map(heap->undetectable_ascii_string_map());

7637 return true;	7637 return true;

7638 }	7638 }

7639 // Rest cannot be marked as undetectable	7639 // Rest cannot be marked as undetectable

7640 return false;	7640 return false;

7641 }	7641 }

7642	7642

7643	7643

7644 bool String::IsEqualTo(Vector<const char> str) {	7644 bool String::IsEqualTo(Vector<const char> str) {

7645 Isolate* isolate = GetIsolate();

7646 int slen = length();	7645 int slen = length();

7647 Access<UnicodeCache::Utf8Decoder>	7646 // Can't check exact length equality, but we can check bounds.

7648 decoder(isolate->unicode_cache()->utf8_decoder());	7647 int str_len = str.length();

7649 decoder->Reset(str.start(), str.length());	7648 if (str_len < slen \|\|

	7649 str_len > slen*static_cast<int>(unibrow::Utf8::kMaxEncodedSize)) {

	7650 return false;

	7651 }

7650 int i;	7652 int i;

7651 for (i = 0; i < slen && decoder->has_more(); i++) {	7653 unsigned remaining_in_str = static_cast<unsigned>(str_len);

7652 uint32_t r = decoder->GetNext();	7654 const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(str.start());

	7655 for (i = 0; i < slen && remaining_in_str > 0; i++) {

	7656 unsigned cursor = 0;

	7657 uint32_t r = unibrow::Utf8::ValueOf(utf8_data, remaining_in_str, &cursor);

	7658 ASSERT(cursor > 0 && cursor <= remaining_in_str);

7653 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {	7659 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {

7654 if (i > slen - 1) return false;	7660 if (i > slen - 1) return false;

7655 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false;	7661 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false;

7656 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false;	7662 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false;

7657 } else {	7663 } else {

7658 if (Get(i) != r) return false;	7664 if (Get(i) != r) return false;

7659 }	7665 }

	7666 utf8_data += cursor;

	7667 remaining_in_str -= cursor;

7660 }	7668 }

7661 return i == slen && !decoder->has_more();	7669 return i == slen && remaining_in_str == 0;

7662 }	7670 }

7663	7671

7664	7672

7665 bool String::IsAsciiEqualTo(Vector<const char> str) {	7673 bool String::IsAsciiEqualTo(Vector<const char> str) {

7666 int slen = length();	7674 int slen = length();

7667 if (str.length() != slen) return false;	7675 if (str.length() != slen) return false;

7668 FlatContent content = GetFlatContent();	7676 FlatContent content = GetFlatContent();

7669 if (content.IsAscii()) {	7677 if (content.IsAscii()) {

7670 return CompareChars(content.ToAsciiVector().start(),	7678 return CompareChars(content.ToAsciiVector().start(),

7671 str.start(), slen) == 0;	7679 str.start(), slen) == 0;

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7856 return MakeArrayIndexHash(array_index_, length_);	7864 return MakeArrayIndexHash(array_index_, length_);

7857 }	7865 }

7858 return (GetHashCore(raw_running_hash_) << String::kHashShift) \|	7866 return (GetHashCore(raw_running_hash_) << String::kHashShift) \|

7859 String::kIsNotArrayIndexMask;	7867 String::kIsNotArrayIndexMask;

7860 } else {	7868 } else {

7861 return (length_ << String::kHashShift) \| String::kIsNotArrayIndexMask;	7869 return (length_ << String::kHashShift) \| String::kIsNotArrayIndexMask;

7862 }	7870 }

7863 }	7871 }

7864	7872

7865	7873

7866 uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer,	7874 uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars,

7867 int length,	7875 uint32_t seed,

7868 uint32_t seed) {	7876 int* utf16_length_out) {

7869 typedef unibrow::Utf16 u;	7877 int vector_length = chars.length();

7870 StringHasher hasher(length, seed);	7878 // Handle some edge cases

7871 // Very long strings have a trivial hash that doesn't inspect the	7879 if (vector_length <= 1) {

7872 // string contents.	7880 ASSERT(vector_length == 0 \|\|

7873 if (hasher.has_trivial_hash()) {	7881 static_cast<uint8_t>(chars.start()[0]) <=

7874 return hasher.GetHashField();	7882 unibrow::Utf8::kMaxOneByteChar);

	7883 *utf16_length_out = vector_length;

	7884 return HashSequentialString(chars.start(), vector_length, seed);

7875 }	7885 }

7876 // Do the iterative array index computation as long as there is a	7886 // Start with a fake length which won't affect computation.

7877 // chance this is an array index.	7887 // It will be updated later.

7878 if (hasher.is_array_index_) {	7888 StringHasher hasher(String::kMaxArrayIndexSize, seed);

7879 while (buffer->has_more()) {	7889 unsigned remaining = static_cast<unsigned>(vector_length);

7880 uint32_t c = buffer->GetNext();	7890 const uint8_t* stream = reinterpret_cast<const uint8_t*>(chars.start());

7881 if (c > u::kMaxNonSurrogateCharCode) {	7891 int utf16_length = 0;

7882 uint16_t c1 = u::LeadSurrogate(c);	7892 bool is_index = true;

7883 uint16_t c2 = u::TrailSurrogate(c);	7893 ASSERT(hasher.is_array_index_);

7884 hasher.AddCharacter(c1);	7894 while (remaining > 0) {

7885 hasher.AddCharacter(c2);	7895 unsigned consumed = 0;

7886 if (!hasher.UpdateIndex(c1)) break;	7896 uint32_t c = unibrow::Utf8::ValueOf(stream, remaining, &consumed);

7887 if (!hasher.UpdateIndex(c2)) break;	7897 ASSERT(consumed > 0 && consumed <= remaining);

7888 } else {	7898 stream += consumed;

7889 hasher.AddCharacter(c);	7899 remaining -= consumed;

7890 if (!hasher.UpdateIndex(c)) break;	7900 bool is_two_byte = c > unibrow::Utf16::kMaxNonSurrogateCharCode;

7891 }	7901 utf16_length += is_two_byte ? 2 : 1;
	Yang 2012/12/20 09:20:27 is_two_byte is a misnomer? is_two_byte is a misnomer?
	7902 // No need to keep hashing. But we do need to calculate utf16_length.

	7903 if (utf16_length > String::kMaxHashCalcLength) continue;

	7904 if (is_two_byte) {

	7905 uint16_t c1 = unibrow::Utf16::LeadSurrogate(c);

	7906 uint16_t c2 = unibrow::Utf16::TrailSurrogate(c);

	7907 hasher.AddCharacter(c1);

	7908 hasher.AddCharacter(c2);

	7909 if (is_index) is_index = hasher.UpdateIndex(c1);

	7910 if (is_index) is_index = hasher.UpdateIndex(c2);

	7911 } else {

	7912 hasher.AddCharacter(c);

	7913 if (is_index) is_index = hasher.UpdateIndex(c);

7892 }	7914 }

7893 }	7915 }

7894 // Process the remaining characters without updating the array	7916 *utf16_length_out = static_cast<int>(utf16_length);

7895 // index.	7917 // Must set length here so that hash computation is correct.

7896 while (buffer->has_more()) {	7918 hasher.length_ = utf16_length;

7897 ASSERT(!hasher.is_array_index_);

7898 uint32_t c = buffer->GetNext();

7899 if (c > u::kMaxNonSurrogateCharCode) {

7900 hasher.AddCharacter(u::LeadSurrogate(c));

7901 hasher.AddCharacter(u::TrailSurrogate(c));

7902 } else {

7903 hasher.AddCharacter(c);

7904 }

7905 }

7906 return hasher.GetHashField();	7919 return hasher.GetHashField();

7907 }	7920 }

7908	7921

7909	7922

7910 MaybeObject* String::SubString(int start, int end, PretenureFlag pretenure) {	7923 MaybeObject* String::SubString(int start, int end, PretenureFlag pretenure) {

7911 Heap* heap = GetHeap();	7924 Heap* heap = GetHeap();

7912 if (start == 0 && end == length()) return this;	7925 if (start == 0 && end == length()) return this;

7913 MaybeObject* result = heap->AllocateSubString(this, start, end, pretenure);	7926 MaybeObject* result = heap->AllocateSubString(this, start, end, pretenure);

7914 return result;	7927 return result;

7915 }	7928 }

(...skipping 3794 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
11710 public:	11723 public:

11711 explicit Utf8SymbolKey(Vector<const char> string, uint32_t seed)	11724 explicit Utf8SymbolKey(Vector<const char> string, uint32_t seed)

11712 : string_(string), hash_field_(0), seed_(seed) { }	11725 : string_(string), hash_field_(0), seed_(seed) { }

11713	11726

11714 bool IsMatch(Object* string) {	11727 bool IsMatch(Object* string) {

11715 return String::cast(string)->IsEqualTo(string_);	11728 return String::cast(string)->IsEqualTo(string_);

11716 }	11729 }

11717	11730

11718 uint32_t Hash() {	11731 uint32_t Hash() {

11719 if (hash_field_ != 0) return hash_field_ >> String::kHashShift;	11732 if (hash_field_ != 0) return hash_field_ >> String::kHashShift;

11720 unibrow::Utf8InputBuffer<> buffer(string_.start(),	11733 hash_field_ = StringHasher::ComputeUtf8Hash(string_, seed_, &chars_);

11721 static_cast<unsigned>(string_.length()));

11722 chars_ = buffer.Utf16Length();

11723 hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);

11724 uint32_t result = hash_field_ >> String::kHashShift;	11734 uint32_t result = hash_field_ >> String::kHashShift;

11725 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.	11735 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.

11726 return result;	11736 return result;

11727 }	11737 }

11728	11738

11729 uint32_t HashForObject(Object* other) {	11739 uint32_t HashForObject(Object* other) {

11730 return String::cast(other)->Hash();	11740 return String::cast(other)->Hash();

11731 }	11741 }

11732	11742

11733 MaybeObject* AsObject() {	11743 MaybeObject* AsObject() {

(...skipping 2317 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
14051 set_year(Smi::FromInt(year), SKIP_WRITE_BARRIER);	14061 set_year(Smi::FromInt(year), SKIP_WRITE_BARRIER);

14052 set_month(Smi::FromInt(month), SKIP_WRITE_BARRIER);	14062 set_month(Smi::FromInt(month), SKIP_WRITE_BARRIER);

14053 set_day(Smi::FromInt(day), SKIP_WRITE_BARRIER);	14063 set_day(Smi::FromInt(day), SKIP_WRITE_BARRIER);

14054 set_weekday(Smi::FromInt(weekday), SKIP_WRITE_BARRIER);	14064 set_weekday(Smi::FromInt(weekday), SKIP_WRITE_BARRIER);

14055 set_hour(Smi::FromInt(hour), SKIP_WRITE_BARRIER);	14065 set_hour(Smi::FromInt(hour), SKIP_WRITE_BARRIER);

14056 set_min(Smi::FromInt(min), SKIP_WRITE_BARRIER);	14066 set_min(Smi::FromInt(min), SKIP_WRITE_BARRIER);

14057 set_sec(Smi::FromInt(sec), SKIP_WRITE_BARRIER);	14067 set_sec(Smi::FromInt(sec), SKIP_WRITE_BARRIER);

14058 }	14068 }

14059	14069

14060 } } // namespace v8::internal	14070 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/objects.h ('k') | src/scanner.h » ('j') | src/unicode.cc » ('J')