OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 6025 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6036 | 6036 |
6037 // Negative length means the to the end of the string. | 6037 // Negative length means the to the end of the string. |
6038 if (length < 0) length = kMaxInt - offset; | 6038 if (length < 0) length = kMaxInt - offset; |
6039 | 6039 |
6040 // Compute the size of the UTF-8 string. Start at the specified offset. | 6040 // Compute the size of the UTF-8 string. Start at the specified offset. |
6041 Access<StringInputBuffer> buffer( | 6041 Access<StringInputBuffer> buffer( |
6042 heap->isolate()->objects_string_input_buffer()); | 6042 heap->isolate()->objects_string_input_buffer()); |
6043 buffer->Reset(offset, this); | 6043 buffer->Reset(offset, this); |
6044 int character_position = offset; | 6044 int character_position = offset; |
6045 int utf8_bytes = 0; | 6045 int utf8_bytes = 0; |
| 6046 int last = unibrow::Utf16::kNoPreviousCharacter; |
6046 while (buffer->has_more() && character_position++ < offset + length) { | 6047 while (buffer->has_more() && character_position++ < offset + length) { |
6047 uint16_t character = buffer->GetNext(); | 6048 uint16_t character = buffer->GetNext(); |
6048 utf8_bytes += unibrow::Utf8::Length(character); | 6049 utf8_bytes += unibrow::Utf8::Length(character, last); |
| 6050 last = character; |
6049 } | 6051 } |
6050 | 6052 |
6051 if (length_return) { | 6053 if (length_return) { |
6052 *length_return = utf8_bytes; | 6054 *length_return = utf8_bytes; |
6053 } | 6055 } |
6054 | 6056 |
6055 char* result = NewArray<char>(utf8_bytes + 1); | 6057 char* result = NewArray<char>(utf8_bytes + 1); |
6056 | 6058 |
6057 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset. | 6059 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset. |
6058 buffer->Rewind(); | 6060 buffer->Rewind(); |
6059 buffer->Seek(offset); | 6061 buffer->Seek(offset); |
6060 character_position = offset; | 6062 character_position = offset; |
6061 int utf8_byte_position = 0; | 6063 int utf8_byte_position = 0; |
| 6064 last = unibrow::Utf16::kNoPreviousCharacter; |
6062 while (buffer->has_more() && character_position++ < offset + length) { | 6065 while (buffer->has_more() && character_position++ < offset + length) { |
6063 uint16_t character = buffer->GetNext(); | 6066 uint16_t character = buffer->GetNext(); |
6064 if (allow_nulls == DISALLOW_NULLS && character == 0) { | 6067 if (allow_nulls == DISALLOW_NULLS && character == 0) { |
6065 character = ' '; | 6068 character = ' '; |
6066 } | 6069 } |
6067 utf8_byte_position += | 6070 utf8_byte_position += |
6068 unibrow::Utf8::Encode(result + utf8_byte_position, character); | 6071 unibrow::Utf8::Encode(result + utf8_byte_position, character, last); |
| 6072 last = character; |
6069 } | 6073 } |
6070 result[utf8_byte_position] = 0; | 6074 result[utf8_byte_position] = 0; |
6071 return SmartArrayPointer<char>(result); | 6075 return SmartArrayPointer<char>(result); |
6072 } | 6076 } |
6073 | 6077 |
6074 | 6078 |
6075 SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls, | 6079 SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls, |
6076 RobustnessFlag robust_flag, | 6080 RobustnessFlag robust_flag, |
6077 int* length_return) { | 6081 int* length_return) { |
6078 return ToCString(allow_nulls, robust_flag, 0, -1, length_return); | 6082 return ToCString(allow_nulls, robust_flag, 0, -1, length_return); |
(...skipping 293 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6372 max_chars); | 6376 max_chars); |
6373 default: | 6377 default: |
6374 break; | 6378 break; |
6375 } | 6379 } |
6376 | 6380 |
6377 UNREACHABLE(); | 6381 UNREACHABLE(); |
6378 return 0; | 6382 return 0; |
6379 } | 6383 } |
6380 | 6384 |
6381 | 6385 |
6382 // This method determines the type of string involved and then gets the UTF8 | |
6383 // length of the string. It doesn't flatten the string and has log(n) recursion | |
6384 // for a string of length n. | |
6385 int String::Utf8Length(String* input, int from, int to) { | |
6386 if (from == to) return 0; | |
6387 int total = 0; | |
6388 while (true) { | |
6389 if (input->IsAsciiRepresentation()) return total + to - from; | |
6390 switch (StringShape(input).representation_tag()) { | |
6391 case kConsStringTag: { | |
6392 ConsString* str = ConsString::cast(input); | |
6393 String* first = str->first(); | |
6394 String* second = str->second(); | |
6395 int first_length = first->length(); | |
6396 if (first_length - from < to - first_length) { | |
6397 if (first_length > from) { | |
6398 // Left hand side is shorter. | |
6399 total += Utf8Length(first, from, first_length); | |
6400 input = second; | |
6401 from = 0; | |
6402 to -= first_length; | |
6403 } else { | |
6404 // We only need the right hand side. | |
6405 input = second; | |
6406 from -= first_length; | |
6407 to -= first_length; | |
6408 } | |
6409 } else { | |
6410 if (first_length <= to) { | |
6411 // Right hand side is shorter. | |
6412 total += Utf8Length(second, 0, to - first_length); | |
6413 input = first; | |
6414 to = first_length; | |
6415 } else { | |
6416 // We only need the left hand side. | |
6417 input = first; | |
6418 } | |
6419 } | |
6420 continue; | |
6421 } | |
6422 case kExternalStringTag: | |
6423 case kSeqStringTag: { | |
6424 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); | |
6425 const uc16* p = vector.start(); | |
6426 for (int i = from; i < to; i++) { | |
6427 total += unibrow::Utf8::Length(p[i]); | |
6428 } | |
6429 return total; | |
6430 } | |
6431 case kSlicedStringTag: { | |
6432 SlicedString* str = SlicedString::cast(input); | |
6433 int offset = str->offset(); | |
6434 input = str->parent(); | |
6435 from += offset; | |
6436 to += offset; | |
6437 continue; | |
6438 } | |
6439 default: | |
6440 break; | |
6441 } | |
6442 UNREACHABLE(); | |
6443 return 0; | |
6444 } | |
6445 return 0; | |
6446 } | |
6447 | |
6448 | |
6449 void Relocatable::PostGarbageCollectionProcessing() { | 6386 void Relocatable::PostGarbageCollectionProcessing() { |
6450 Isolate* isolate = Isolate::Current(); | 6387 Isolate* isolate = Isolate::Current(); |
6451 Relocatable* current = isolate->relocatable_top(); | 6388 Relocatable* current = isolate->relocatable_top(); |
6452 while (current != NULL) { | 6389 while (current != NULL) { |
6453 current->PostGarbageCollection(); | 6390 current->PostGarbageCollection(); |
6454 current = current->prev_; | 6391 current = current->prev_; |
6455 } | 6392 } |
6456 } | 6393 } |
6457 | 6394 |
6458 | 6395 |
(...skipping 373 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6832 } | 6769 } |
6833 } | 6770 } |
6834 } | 6771 } |
6835 | 6772 |
6836 | 6773 |
6837 template <typename IteratorA, typename IteratorB> | 6774 template <typename IteratorA, typename IteratorB> |
6838 static inline bool CompareStringContents(IteratorA* ia, IteratorB* ib) { | 6775 static inline bool CompareStringContents(IteratorA* ia, IteratorB* ib) { |
6839 // General slow case check. We know that the ia and ib iterators | 6776 // General slow case check. We know that the ia and ib iterators |
6840 // have the same length. | 6777 // have the same length. |
6841 while (ia->has_more()) { | 6778 while (ia->has_more()) { |
6842 uc32 ca = ia->GetNext(); | 6779 uint32_t ca = ia->GetNext(); |
6843 uc32 cb = ib->GetNext(); | 6780 uint32_t cb = ib->GetNext(); |
| 6781 ASSERT(ca <= unibrow::Utf16::kMaxNonSurrogateCharCode); |
| 6782 ASSERT(cb <= unibrow::Utf16::kMaxNonSurrogateCharCode); |
6844 if (ca != cb) | 6783 if (ca != cb) |
6845 return false; | 6784 return false; |
6846 } | 6785 } |
6847 return true; | 6786 return true; |
6848 } | 6787 } |
6849 | 6788 |
6850 | 6789 |
6851 // Compares the contents of two strings by reading and comparing | 6790 // Compares the contents of two strings by reading and comparing |
6852 // int-sized blocks of characters. | 6791 // int-sized blocks of characters. |
6853 template <typename Char> | 6792 template <typename Char> |
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7016 | 6955 |
7017 | 6956 |
7018 bool String::IsEqualTo(Vector<const char> str) { | 6957 bool String::IsEqualTo(Vector<const char> str) { |
7019 Isolate* isolate = GetIsolate(); | 6958 Isolate* isolate = GetIsolate(); |
7020 int slen = length(); | 6959 int slen = length(); |
7021 Access<UnicodeCache::Utf8Decoder> | 6960 Access<UnicodeCache::Utf8Decoder> |
7022 decoder(isolate->unicode_cache()->utf8_decoder()); | 6961 decoder(isolate->unicode_cache()->utf8_decoder()); |
7023 decoder->Reset(str.start(), str.length()); | 6962 decoder->Reset(str.start(), str.length()); |
7024 int i; | 6963 int i; |
7025 for (i = 0; i < slen && decoder->has_more(); i++) { | 6964 for (i = 0; i < slen && decoder->has_more(); i++) { |
7026 uc32 r = decoder->GetNext(); | 6965 uint32_t r = decoder->GetNext(); |
7027 if (Get(i) != r) return false; | 6966 if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 6967 if (i > slen - 1) return false; |
| 6968 if (Get(i++) != unibrow::Utf16::LeadSurrogate(r)) return false; |
| 6969 if (Get(i) != unibrow::Utf16::TrailSurrogate(r)) return false; |
| 6970 } else { |
| 6971 if (Get(i) != r) return false; |
| 6972 } |
7028 } | 6973 } |
7029 return i == slen && !decoder->has_more(); | 6974 return i == slen && !decoder->has_more(); |
7030 } | 6975 } |
7031 | 6976 |
7032 | 6977 |
7033 bool String::IsAsciiEqualTo(Vector<const char> str) { | 6978 bool String::IsAsciiEqualTo(Vector<const char> str) { |
7034 int slen = length(); | 6979 int slen = length(); |
7035 if (str.length() != slen) return false; | 6980 if (str.length() != slen) return false; |
7036 FlatContent content = GetFlatContent(); | 6981 FlatContent content = GetFlatContent(); |
7037 if (content.IsAscii()) { | 6982 if (content.IsAscii()) { |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7147 value <<= String::kHashShift; | 7092 value <<= String::kHashShift; |
7148 value |= length << String::kArrayIndexHashLengthShift; | 7093 value |= length << String::kArrayIndexHashLengthShift; |
7149 | 7094 |
7150 ASSERT((value & String::kIsNotArrayIndexMask) == 0); | 7095 ASSERT((value & String::kIsNotArrayIndexMask) == 0); |
7151 ASSERT((length > String::kMaxCachedArrayIndexLength) || | 7096 ASSERT((length > String::kMaxCachedArrayIndexLength) || |
7152 (value & String::kContainsCachedArrayIndexMask) == 0); | 7097 (value & String::kContainsCachedArrayIndexMask) == 0); |
7153 return value; | 7098 return value; |
7154 } | 7099 } |
7155 | 7100 |
7156 | 7101 |
| 7102 void StringHasher::AddSurrogatePair(uc32 c) { |
| 7103 uint16_t lead = unibrow::Utf16::LeadSurrogate(c); |
| 7104 AddCharacter(lead); |
| 7105 uint16_t trail = unibrow::Utf16::TrailSurrogate(c); |
| 7106 AddCharacter(trail); |
| 7107 } |
| 7108 |
| 7109 |
| 7110 void StringHasher::AddSurrogatePairNoIndex(uc32 c) { |
| 7111 uint16_t lead = unibrow::Utf16::LeadSurrogate(c); |
| 7112 AddCharacterNoIndex(lead); |
| 7113 uint16_t trail = unibrow::Utf16::TrailSurrogate(c); |
| 7114 AddCharacterNoIndex(trail); |
| 7115 } |
| 7116 |
| 7117 |
7157 uint32_t StringHasher::GetHashField() { | 7118 uint32_t StringHasher::GetHashField() { |
7158 ASSERT(is_valid()); | 7119 ASSERT(is_valid()); |
7159 if (length_ <= String::kMaxHashCalcLength) { | 7120 if (length_ <= String::kMaxHashCalcLength) { |
7160 if (is_array_index()) { | 7121 if (is_array_index()) { |
7161 return MakeArrayIndexHash(array_index(), length_); | 7122 return MakeArrayIndexHash(array_index(), length_); |
7162 } | 7123 } |
7163 return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask; | 7124 return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask; |
7164 } else { | 7125 } else { |
7165 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask; | 7126 return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask; |
7166 } | 7127 } |
(...skipping 3572 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
10739 : string_(string), hash_field_(0), seed_(seed) { } | 10700 : string_(string), hash_field_(0), seed_(seed) { } |
10740 | 10701 |
10741 bool IsMatch(Object* string) { | 10702 bool IsMatch(Object* string) { |
10742 return String::cast(string)->IsEqualTo(string_); | 10703 return String::cast(string)->IsEqualTo(string_); |
10743 } | 10704 } |
10744 | 10705 |
10745 uint32_t Hash() { | 10706 uint32_t Hash() { |
10746 if (hash_field_ != 0) return hash_field_ >> String::kHashShift; | 10707 if (hash_field_ != 0) return hash_field_ >> String::kHashShift; |
10747 unibrow::Utf8InputBuffer<> buffer(string_.start(), | 10708 unibrow::Utf8InputBuffer<> buffer(string_.start(), |
10748 static_cast<unsigned>(string_.length())); | 10709 static_cast<unsigned>(string_.length())); |
10749 chars_ = buffer.Length(); | 10710 chars_ = buffer.Utf16Length(); |
10750 hash_field_ = String::ComputeHashField(&buffer, chars_, seed_); | 10711 hash_field_ = String::ComputeHashField(&buffer, chars_, seed_); |
10751 uint32_t result = hash_field_ >> String::kHashShift; | 10712 uint32_t result = hash_field_ >> String::kHashShift; |
10752 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. | 10713 ASSERT(result != 0); // Ensure that the hash value of 0 is never computed. |
10753 return result; | 10714 return result; |
10754 } | 10715 } |
10755 | 10716 |
10756 uint32_t HashForObject(Object* other) { | 10717 uint32_t HashForObject(Object* other) { |
10757 return String::cast(other)->Hash(); | 10718 return String::cast(other)->Hash(); |
10758 } | 10719 } |
10759 | 10720 |
(...skipping 2197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
12957 if (break_point_objects()->IsUndefined()) return 0; | 12918 if (break_point_objects()->IsUndefined()) return 0; |
12958 // Single break point. | 12919 // Single break point. |
12959 if (!break_point_objects()->IsFixedArray()) return 1; | 12920 if (!break_point_objects()->IsFixedArray()) return 1; |
12960 // Multiple break points. | 12921 // Multiple break points. |
12961 return FixedArray::cast(break_point_objects())->length(); | 12922 return FixedArray::cast(break_point_objects())->length(); |
12962 } | 12923 } |
12963 #endif // ENABLE_DEBUGGER_SUPPORT | 12924 #endif // ENABLE_DEBUGGER_SUPPORT |
12964 | 12925 |
12965 | 12926 |
12966 } } // namespace v8::internal | 12927 } } // namespace v8::internal |
OLD | NEW |