Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/objects.cc

Issue 11649018: Remove Utf8InputBuffer (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/objects.h ('k') | src/scanner.h » ('j') | src/unicode.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 7624 matching lines...) Expand 10 before | Expand all | Expand 10 after
7635 } else if (map == heap->ascii_string_map()) { 7635 } else if (map == heap->ascii_string_map()) {
7636 this->set_map(heap->undetectable_ascii_string_map()); 7636 this->set_map(heap->undetectable_ascii_string_map());
7637 return true; 7637 return true;
7638 } 7638 }
7639 // Rest cannot be marked as undetectable 7639 // Rest cannot be marked as undetectable
7640 return false; 7640 return false;
7641 } 7641 }
7642 7642
7643 7643
7644 bool String::IsEqualTo(Vector<const char> str) { 7644 bool String::IsEqualTo(Vector<const char> str) {
7645 Isolate* isolate = GetIsolate();
7646 int slen = length(); 7645 int slen = length();
7647 Access<UnicodeCache::Utf8Decoder> 7646 // Can't check exact length equality, but we can check bounds.
7648 decoder(isolate->unicode_cache()->utf8_decoder()); 7647 int str_len = str.length();
7649 decoder->Reset(str.start(), str.length()); 7648 if (str_len < slen ||
7649 str_len > slen*static_cast<int>(unibrow::Utf8::kMaxEncodedSize)) {
7650 return false;
7651 }
7650 int i; 7652 int i;
7651 for (i = 0; i < slen && decoder->has_more(); i++) { 7653 unsigned remaining_in_str = static_cast<unsigned>(str_len);
7652 uint32_t r = decoder->GetNext(); 7654 const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(str.start());
7655 for (i = 0; i < slen && remaining_in_str > 0; i++) {
7656 unsigned cursor = 0;
7657 uint32_t r = unibrow::Utf8::ValueOf(utf8_data, remaining_in_str, &cursor);
7658 ASSERT(cursor > 0 && cursor <= remaining_in_str);
7653 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { 7659 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
7654 if (i > slen - 1) return false; 7660 if (i > slen - 1) return false;
7655 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false; 7661 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false;
7656 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false; 7662 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false;
7657 } else { 7663 } else {
7658 if (Get(i) != r) return false; 7664 if (Get(i) != r) return false;
7659 } 7665 }
7666 utf8_data += cursor;
7667 remaining_in_str -= cursor;
7660 } 7668 }
7661 return i == slen && !decoder->has_more(); 7669 return i == slen && remaining_in_str == 0;
7662 } 7670 }
7663 7671
7664 7672
7665 bool String::IsAsciiEqualTo(Vector<const char> str) { 7673 bool String::IsAsciiEqualTo(Vector<const char> str) {
7666 int slen = length(); 7674 int slen = length();
7667 if (str.length() != slen) return false; 7675 if (str.length() != slen) return false;
7668 FlatContent content = GetFlatContent(); 7676 FlatContent content = GetFlatContent();
7669 if (content.IsAscii()) { 7677 if (content.IsAscii()) {
7670 return CompareChars(content.ToAsciiVector().start(), 7678 return CompareChars(content.ToAsciiVector().start(),
7671 str.start(), slen) == 0; 7679 str.start(), slen) == 0;
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
7856 return MakeArrayIndexHash(array_index_, length_); 7864 return MakeArrayIndexHash(array_index_, length_);
7857 } 7865 }
7858 return (GetHashCore(raw_running_hash_) << String::kHashShift) | 7866 return (GetHashCore(raw_running_hash_) << String::kHashShift) |
7859 String::kIsNotArrayIndexMask; 7867 String::kIsNotArrayIndexMask;
7860 } else { 7868 } else {
7861 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask; 7869 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;
7862 } 7870 }
7863 } 7871 }
7864 7872
7865 7873
7866 uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer, 7874 uint32_t StringHasher::ComputeUtf8Hash(Vector<const char> chars,
7867 int length, 7875 uint32_t seed,
7868 uint32_t seed) { 7876 int* utf16_length_out) {
7869 typedef unibrow::Utf16 u; 7877 int vector_length = chars.length();
7870 StringHasher hasher(length, seed); 7878 // Handle some edge cases
7871 // Very long strings have a trivial hash that doesn't inspect the 7879 if (vector_length <= 1) {
7872 // string contents. 7880 ASSERT(vector_length == 0 ||
7873 if (hasher.has_trivial_hash()) { 7881 static_cast<uint8_t>(chars.start()[0]) <=
7874 return hasher.GetHashField(); 7882 unibrow::Utf8::kMaxOneByteChar);
7883 *utf16_length_out = vector_length;
7884 return HashSequentialString(chars.start(), vector_length, seed);
7875 } 7885 }
7876 // Do the iterative array index computation as long as there is a 7886 // Start with a fake length which won't affect computation.
7877 // chance this is an array index. 7887 // It will be updated later.
7878 if (hasher.is_array_index_) { 7888 StringHasher hasher(String::kMaxArrayIndexSize, seed);
7879 while (buffer->has_more()) { 7889 unsigned remaining = static_cast<unsigned>(vector_length);
7880 uint32_t c = buffer->GetNext(); 7890 const uint8_t* stream = reinterpret_cast<const uint8_t*>(chars.start());
7881 if (c > u::kMaxNonSurrogateCharCode) { 7891 int utf16_length = 0;
7882 uint16_t c1 = u::LeadSurrogate(c); 7892 bool is_index = true;
7883 uint16_t c2 = u::TrailSurrogate(c); 7893 ASSERT(hasher.is_array_index_);
7884 hasher.AddCharacter(c1); 7894 while (remaining > 0) {
7885 hasher.AddCharacter(c2); 7895 unsigned consumed = 0;
7886 if (!hasher.UpdateIndex(c1)) break; 7896 uint32_t c = unibrow::Utf8::ValueOf(stream, remaining, &consumed);
7887 if (!hasher.UpdateIndex(c2)) break; 7897 ASSERT(consumed > 0 && consumed <= remaining);
7888 } else { 7898 stream += consumed;
7889 hasher.AddCharacter(c); 7899 remaining -= consumed;
7890 if (!hasher.UpdateIndex(c)) break; 7900 bool is_two_byte = c > unibrow::Utf16::kMaxNonSurrogateCharCode;
7891 } 7901 utf16_length += is_two_byte ? 2 : 1;
Yang 2012/12/20 09:20:27 is_two_byte is a misnomer?
7902 // No need to keep hashing. But we do need to calculate utf16_length.
7903 if (utf16_length > String::kMaxHashCalcLength) continue;
7904 if (is_two_byte) {
7905 uint16_t c1 = unibrow::Utf16::LeadSurrogate(c);
7906 uint16_t c2 = unibrow::Utf16::TrailSurrogate(c);
7907 hasher.AddCharacter(c1);
7908 hasher.AddCharacter(c2);
7909 if (is_index) is_index = hasher.UpdateIndex(c1);
7910 if (is_index) is_index = hasher.UpdateIndex(c2);
7911 } else {
7912 hasher.AddCharacter(c);
7913 if (is_index) is_index = hasher.UpdateIndex(c);
7892 } 7914 }
7893 } 7915 }
7894 // Process the remaining characters without updating the array 7916 *utf16_length_out = static_cast<int>(utf16_length);
7895 // index. 7917 // Must set length here so that hash computation is correct.
7896 while (buffer->has_more()) { 7918 hasher.length_ = utf16_length;
7897 ASSERT(!hasher.is_array_index_);
7898 uint32_t c = buffer->GetNext();
7899 if (c > u::kMaxNonSurrogateCharCode) {
7900 hasher.AddCharacter(u::LeadSurrogate(c));
7901 hasher.AddCharacter(u::TrailSurrogate(c));
7902 } else {
7903 hasher.AddCharacter(c);
7904 }
7905 }
7906 return hasher.GetHashField(); 7919 return hasher.GetHashField();
7907 } 7920 }
7908 7921
7909 7922
7910 MaybeObject* String::SubString(int start, int end, PretenureFlag pretenure) { 7923 MaybeObject* String::SubString(int start, int end, PretenureFlag pretenure) {
7911 Heap* heap = GetHeap(); 7924 Heap* heap = GetHeap();
7912 if (start == 0 && end == length()) return this; 7925 if (start == 0 && end == length()) return this;
7913 MaybeObject* result = heap->AllocateSubString(this, start, end, pretenure); 7926 MaybeObject* result = heap->AllocateSubString(this, start, end, pretenure);
7914 return result; 7927 return result;
7915 } 7928 }
(...skipping 3794 matching lines...) Expand 10 before | Expand all | Expand 10 after
11710 public: 11723 public:
11711 explicit Utf8SymbolKey(Vector<const char> string, uint32_t seed) 11724 explicit Utf8SymbolKey(Vector<const char> string, uint32_t seed)
11712 : string_(string), hash_field_(0), seed_(seed) { } 11725 : string_(string), hash_field_(0), seed_(seed) { }
11713 11726
11714 bool IsMatch(Object* string) { 11727 bool IsMatch(Object* string) {
11715 return String::cast(string)->IsEqualTo(string_); 11728 return String::cast(string)->IsEqualTo(string_);
11716 } 11729 }
11717 11730
11718 uint32_t Hash() { 11731 uint32_t Hash() {
11719 if (hash_field_ != 0) return hash_field_ >> String::kHashShift; 11732 if (hash_field_ != 0) return hash_field_ >> String::kHashShift;
11720 unibrow::Utf8InputBuffer<> buffer(string_.start(), 11733 hash_field_ = StringHasher::ComputeUtf8Hash(string_, seed_, &chars_);
11721 static_cast<unsigned>(string_.length()));
11722 chars_ = buffer.Utf16Length();
11723 hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);
11724 uint32_t result = hash_field_ >> String::kHashShift; 11734 uint32_t result = hash_field_ >> String::kHashShift;
11725 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. 11735 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed.
11726 return result; 11736 return result;
11727 } 11737 }
11728 11738
11729 uint32_t HashForObject(Object* other) { 11739 uint32_t HashForObject(Object* other) {
11730 return String::cast(other)->Hash(); 11740 return String::cast(other)->Hash();
11731 } 11741 }
11732 11742
11733 MaybeObject* AsObject() { 11743 MaybeObject* AsObject() {
(...skipping 2317 matching lines...) Expand 10 before | Expand all | Expand 10 after
14051 set_year(Smi::FromInt(year), SKIP_WRITE_BARRIER); 14061 set_year(Smi::FromInt(year), SKIP_WRITE_BARRIER);
14052 set_month(Smi::FromInt(month), SKIP_WRITE_BARRIER); 14062 set_month(Smi::FromInt(month), SKIP_WRITE_BARRIER);
14053 set_day(Smi::FromInt(day), SKIP_WRITE_BARRIER); 14063 set_day(Smi::FromInt(day), SKIP_WRITE_BARRIER);
14054 set_weekday(Smi::FromInt(weekday), SKIP_WRITE_BARRIER); 14064 set_weekday(Smi::FromInt(weekday), SKIP_WRITE_BARRIER);
14055 set_hour(Smi::FromInt(hour), SKIP_WRITE_BARRIER); 14065 set_hour(Smi::FromInt(hour), SKIP_WRITE_BARRIER);
14056 set_min(Smi::FromInt(min), SKIP_WRITE_BARRIER); 14066 set_min(Smi::FromInt(min), SKIP_WRITE_BARRIER);
14057 set_sec(Smi::FromInt(sec), SKIP_WRITE_BARRIER); 14067 set_sec(Smi::FromInt(sec), SKIP_WRITE_BARRIER);
14058 } 14068 }
14059 14069
14060 } } // namespace v8::internal 14070 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/objects.h ('k') | src/scanner.h » ('j') | src/unicode.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698