| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 865 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 876 | 876 |
| 877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table, | 877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table, |
| 878 Handle<Object> key, | 878 Handle<Object> key, |
| 879 Handle<Object> value) { | 879 Handle<Object> value) { |
| 880 CALL_HEAP_FUNCTION(table->GetIsolate(), | 880 CALL_HEAP_FUNCTION(table->GetIsolate(), |
| 881 table->Put(*key, *value), | 881 table->Put(*key, *value), |
| 882 ObjectHashTable); | 882 ObjectHashTable); |
| 883 } | 883 } |
| 884 | 884 |
| 885 | 885 |
| 886 // This method determines the type of string involved and then gets the UTF8 | |
| 887 // length of the string. It doesn't flatten the string and has log(n) recursion | |
| 888 // for a string of length n. If the failure flag gets set, then we have to | |
| 889 // flatten the string and retry. Failures are caused by surrogate pairs in deep | |
| 890 // cons strings. | |
| 891 | |
| 892 // Single surrogate characters that are encountered in the UTF-16 character | |
| 893 // sequence of the input string get counted as 3 UTF-8 bytes, because that | |
| 894 // is the way that WriteUtf8 will encode them. Surrogate pairs are counted and | |
| 895 // encoded as one 4-byte UTF-8 sequence. | |
| 896 | |
| 897 // This function conceptually uses recursion on the two halves of cons strings. | |
| 898 // However, in order to avoid the recursion going too deep it recurses on the | |
| 899 // second string of the cons, but iterates on the first substring (by manually | |
| 900 // eliminating it as a tail recursion). This means it counts the UTF-8 length | |
| 901 // from the end to the start, which makes no difference to the total. | |
| 902 | |
| 903 // Surrogate pairs are recognized even if they are split across two sides of a | |
| 904 // cons, which complicates the implementation somewhat. Therefore, too deep | |
| 905 // recursion cannot always be avoided. This case is detected, and the failure | |
| 906 // flag is set, a signal to the caller that the string should be flattened and | |
| 907 // the operation retried. | |
| 908 int Utf8LengthHelper(String* input, | |
| 909 int from, | |
| 910 int to, | |
| 911 bool followed_by_surrogate, | |
| 912 int max_recursion, | |
| 913 bool* failure, | |
| 914 bool* starts_with_surrogate) { | |
| 915 if (from == to) return 0; | |
| 916 int total = 0; | |
| 917 bool dummy; | |
| 918 while (true) { | |
| 919 if (input->IsOneByteRepresentation()) { | |
| 920 *starts_with_surrogate = false; | |
| 921 return total + to - from; | |
| 922 } | |
| 923 switch (StringShape(input).representation_tag()) { | |
| 924 case kConsStringTag: { | |
| 925 ConsString* str = ConsString::cast(input); | |
| 926 String* first = str->first(); | |
| 927 String* second = str->second(); | |
| 928 int first_length = first->length(); | |
| 929 if (first_length - from > to - first_length) { | |
| 930 if (first_length < to) { | |
| 931 // Right hand side is shorter. No need to check the recursion depth | |
| 932 // since this can only happen log(n) times. | |
| 933 bool right_starts_with_surrogate = false; | |
| 934 total += Utf8LengthHelper(second, | |
| 935 0, | |
| 936 to - first_length, | |
| 937 followed_by_surrogate, | |
| 938 max_recursion - 1, | |
| 939 failure, | |
| 940 &right_starts_with_surrogate); | |
| 941 if (*failure) return 0; | |
| 942 followed_by_surrogate = right_starts_with_surrogate; | |
| 943 input = first; | |
| 944 to = first_length; | |
| 945 } else { | |
| 946 // We only need the left hand side. | |
| 947 input = first; | |
| 948 } | |
| 949 } else { | |
| 950 if (first_length > from) { | |
| 951 // Left hand side is shorter. | |
| 952 if (first->IsOneByteRepresentation()) { | |
| 953 total += first_length - from; | |
| 954 *starts_with_surrogate = false; | |
| 955 starts_with_surrogate = &dummy; | |
| 956 input = second; | |
| 957 from = 0; | |
| 958 to -= first_length; | |
| 959 } else if (second->IsOneByteRepresentation()) { | |
| 960 followed_by_surrogate = false; | |
| 961 total += to - first_length; | |
| 962 input = first; | |
| 963 to = first_length; | |
| 964 } else if (max_recursion > 0) { | |
| 965 bool right_starts_with_surrogate = false; | |
| 966 // Recursing on the long one. This may fail. | |
| 967 total += Utf8LengthHelper(second, | |
| 968 0, | |
| 969 to - first_length, | |
| 970 followed_by_surrogate, | |
| 971 max_recursion - 1, | |
| 972 failure, | |
| 973 &right_starts_with_surrogate); | |
| 974 if (*failure) return 0; | |
| 975 input = first; | |
| 976 to = first_length; | |
| 977 followed_by_surrogate = right_starts_with_surrogate; | |
| 978 } else { | |
| 979 *failure = true; | |
| 980 return 0; | |
| 981 } | |
| 982 } else { | |
| 983 // We only need the right hand side. | |
| 984 input = second; | |
| 985 from = 0; | |
| 986 to -= first_length; | |
| 987 } | |
| 988 } | |
| 989 continue; | |
| 990 } | |
| 991 case kExternalStringTag: | |
| 992 case kSeqStringTag: { | |
| 993 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); | |
| 994 const uc16* p = vector.start(); | |
| 995 int previous = unibrow::Utf16::kNoPreviousCharacter; | |
| 996 for (int i = from; i < to; i++) { | |
| 997 uc16 c = p[i]; | |
| 998 total += unibrow::Utf8::Length(c, previous); | |
| 999 previous = c; | |
| 1000 } | |
| 1001 if (to - from > 0) { | |
| 1002 if (unibrow::Utf16::IsLeadSurrogate(previous) && | |
| 1003 followed_by_surrogate) { | |
| 1004 total -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
| 1005 } | |
| 1006 if (unibrow::Utf16::IsTrailSurrogate(p[from])) { | |
| 1007 *starts_with_surrogate = true; | |
| 1008 } | |
| 1009 } | |
| 1010 return total; | |
| 1011 } | |
| 1012 case kSlicedStringTag: { | |
| 1013 SlicedString* str = SlicedString::cast(input); | |
| 1014 int offset = str->offset(); | |
| 1015 input = str->parent(); | |
| 1016 from += offset; | |
| 1017 to += offset; | |
| 1018 continue; | |
| 1019 } | |
| 1020 default: | |
| 1021 break; | |
| 1022 } | |
| 1023 UNREACHABLE(); | |
| 1024 return 0; | |
| 1025 } | |
| 1026 return 0; | |
| 1027 } | |
| 1028 | |
| 1029 | |
| 1030 int Utf8Length(Handle<String> str) { | |
| 1031 bool dummy; | |
| 1032 bool failure; | |
| 1033 int len; | |
| 1034 const int kRecursionBudget = 100; | |
| 1035 do { | |
| 1036 failure = false; | |
| 1037 len = Utf8LengthHelper( | |
| 1038 *str, 0, str->length(), false, kRecursionBudget, &failure, &dummy); | |
| 1039 if (failure) FlattenString(str); | |
| 1040 } while (failure); | |
| 1041 return len; | |
| 1042 } | |
| 1043 | |
| 1044 | |
| 1045 DeferredHandleScope::DeferredHandleScope(Isolate* isolate) | 886 DeferredHandleScope::DeferredHandleScope(Isolate* isolate) |
| 1046 : impl_(isolate->handle_scope_implementer()) { | 887 : impl_(isolate->handle_scope_implementer()) { |
| 1047 ASSERT(impl_->isolate() == Isolate::Current()); | 888 ASSERT(impl_->isolate() == Isolate::Current()); |
| 1048 impl_->BeginDeferredScope(); | 889 impl_->BeginDeferredScope(); |
| 1049 v8::ImplementationUtilities::HandleScopeData* data = | 890 v8::ImplementationUtilities::HandleScopeData* data = |
| 1050 impl_->isolate()->handle_scope_data(); | 891 impl_->isolate()->handle_scope_data(); |
| 1051 Object** new_next = impl_->GetSpareOrNewBlock(); | 892 Object** new_next = impl_->GetSpareOrNewBlock(); |
| 1052 Object** new_limit = &new_next[kHandleBlockSize]; | 893 Object** new_limit = &new_next[kHandleBlockSize]; |
| 1053 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]); | 894 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]); |
| 1054 impl_->blocks()->Add(new_next); | 895 impl_->blocks()->Add(new_next); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1078 data->next = prev_next_; | 919 data->next = prev_next_; |
| 1079 data->limit = prev_limit_; | 920 data->limit = prev_limit_; |
| 1080 #ifdef DEBUG | 921 #ifdef DEBUG |
| 1081 handles_detached_ = true; | 922 handles_detached_ = true; |
| 1082 #endif | 923 #endif |
| 1083 return deferred; | 924 return deferred; |
| 1084 } | 925 } |
| 1085 | 926 |
| 1086 | 927 |
| 1087 } } // namespace v8::internal | 928 } } // namespace v8::internal |
| OLD | NEW |