OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 865 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
876 | 876 |
877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table, | 877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table, |
878 Handle<Object> key, | 878 Handle<Object> key, |
879 Handle<Object> value) { | 879 Handle<Object> value) { |
880 CALL_HEAP_FUNCTION(table->GetIsolate(), | 880 CALL_HEAP_FUNCTION(table->GetIsolate(), |
881 table->Put(*key, *value), | 881 table->Put(*key, *value), |
882 ObjectHashTable); | 882 ObjectHashTable); |
883 } | 883 } |
884 | 884 |
885 | 885 |
886 // This method determines the type of string involved and then gets the UTF8 | |
887 // length of the string. It doesn't flatten the string and has log(n) recursion | |
888 // for a string of length n. If the failure flag gets set, then we have to | |
889 // flatten the string and retry. Failures are caused by surrogate pairs in deep | |
890 // cons strings. | |
891 | |
892 // Single surrogate characters that are encountered in the UTF-16 character | |
893 // sequence of the input string get counted as 3 UTF-8 bytes, because that | |
894 // is the way that WriteUtf8 will encode them. Surrogate pairs are counted and | |
895 // encoded as one 4-byte UTF-8 sequence. | |
896 | |
897 // This function conceptually uses recursion on the two halves of cons strings. | |
898 // However, in order to avoid the recursion going too deep it recurses on the | |
899 // second string of the cons, but iterates on the first substring (by manually | |
900 // eliminating it as a tail recursion). This means it counts the UTF-8 length | |
901 // from the end to the start, which makes no difference to the total. | |
902 | |
903 // Surrogate pairs are recognized even if they are split across two sides of a | |
904 // cons, which complicates the implementation somewhat. Therefore, too deep | |
905 // recursion cannot always be avoided. This case is detected, and the failure | |
906 // flag is set, a signal to the caller that the string should be flattened and | |
907 // the operation retried. | |
908 int Utf8LengthHelper(String* input, | |
909 int from, | |
910 int to, | |
911 bool followed_by_surrogate, | |
912 int max_recursion, | |
913 bool* failure, | |
914 bool* starts_with_surrogate) { | |
915 if (from == to) return 0; | |
916 int total = 0; | |
917 bool dummy; | |
918 while (true) { | |
919 if (input->IsOneByteRepresentation()) { | |
920 *starts_with_surrogate = false; | |
921 return total + to - from; | |
922 } | |
923 switch (StringShape(input).representation_tag()) { | |
924 case kConsStringTag: { | |
925 ConsString* str = ConsString::cast(input); | |
926 String* first = str->first(); | |
927 String* second = str->second(); | |
928 int first_length = first->length(); | |
929 if (first_length - from > to - first_length) { | |
930 if (first_length < to) { | |
931 // Right hand side is shorter. No need to check the recursion depth | |
932 // since this can only happen log(n) times. | |
933 bool right_starts_with_surrogate = false; | |
934 total += Utf8LengthHelper(second, | |
935 0, | |
936 to - first_length, | |
937 followed_by_surrogate, | |
938 max_recursion - 1, | |
939 failure, | |
940 &right_starts_with_surrogate); | |
941 if (*failure) return 0; | |
942 followed_by_surrogate = right_starts_with_surrogate; | |
943 input = first; | |
944 to = first_length; | |
945 } else { | |
946 // We only need the left hand side. | |
947 input = first; | |
948 } | |
949 } else { | |
950 if (first_length > from) { | |
951 // Left hand side is shorter. | |
952 if (first->IsOneByteRepresentation()) { | |
953 total += first_length - from; | |
954 *starts_with_surrogate = false; | |
955 starts_with_surrogate = &dummy; | |
956 input = second; | |
957 from = 0; | |
958 to -= first_length; | |
959 } else if (second->IsOneByteRepresentation()) { | |
960 followed_by_surrogate = false; | |
961 total += to - first_length; | |
962 input = first; | |
963 to = first_length; | |
964 } else if (max_recursion > 0) { | |
965 bool right_starts_with_surrogate = false; | |
966 // Recursing on the long one. This may fail. | |
967 total += Utf8LengthHelper(second, | |
968 0, | |
969 to - first_length, | |
970 followed_by_surrogate, | |
971 max_recursion - 1, | |
972 failure, | |
973 &right_starts_with_surrogate); | |
974 if (*failure) return 0; | |
975 input = first; | |
976 to = first_length; | |
977 followed_by_surrogate = right_starts_with_surrogate; | |
978 } else { | |
979 *failure = true; | |
980 return 0; | |
981 } | |
982 } else { | |
983 // We only need the right hand side. | |
984 input = second; | |
985 from = 0; | |
986 to -= first_length; | |
987 } | |
988 } | |
989 continue; | |
990 } | |
991 case kExternalStringTag: | |
992 case kSeqStringTag: { | |
993 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector(); | |
994 const uc16* p = vector.start(); | |
995 int previous = unibrow::Utf16::kNoPreviousCharacter; | |
996 for (int i = from; i < to; i++) { | |
997 uc16 c = p[i]; | |
998 total += unibrow::Utf8::Length(c, previous); | |
999 previous = c; | |
1000 } | |
1001 if (to - from > 0) { | |
1002 if (unibrow::Utf16::IsLeadSurrogate(previous) && | |
1003 followed_by_surrogate) { | |
1004 total -= unibrow::Utf8::kBytesSavedByCombiningSurrogates; | |
1005 } | |
1006 if (unibrow::Utf16::IsTrailSurrogate(p[from])) { | |
1007 *starts_with_surrogate = true; | |
1008 } | |
1009 } | |
1010 return total; | |
1011 } | |
1012 case kSlicedStringTag: { | |
1013 SlicedString* str = SlicedString::cast(input); | |
1014 int offset = str->offset(); | |
1015 input = str->parent(); | |
1016 from += offset; | |
1017 to += offset; | |
1018 continue; | |
1019 } | |
1020 default: | |
1021 break; | |
1022 } | |
1023 UNREACHABLE(); | |
1024 return 0; | |
1025 } | |
1026 return 0; | |
1027 } | |
1028 | |
1029 | |
1030 int Utf8Length(Handle<String> str) { | |
1031 bool dummy; | |
1032 bool failure; | |
1033 int len; | |
1034 const int kRecursionBudget = 100; | |
1035 do { | |
1036 failure = false; | |
1037 len = Utf8LengthHelper( | |
1038 *str, 0, str->length(), false, kRecursionBudget, &failure, &dummy); | |
1039 if (failure) FlattenString(str); | |
1040 } while (failure); | |
1041 return len; | |
1042 } | |
1043 | |
1044 | |
1045 DeferredHandleScope::DeferredHandleScope(Isolate* isolate) | 886 DeferredHandleScope::DeferredHandleScope(Isolate* isolate) |
1046 : impl_(isolate->handle_scope_implementer()) { | 887 : impl_(isolate->handle_scope_implementer()) { |
1047 ASSERT(impl_->isolate() == Isolate::Current()); | 888 ASSERT(impl_->isolate() == Isolate::Current()); |
1048 impl_->BeginDeferredScope(); | 889 impl_->BeginDeferredScope(); |
1049 v8::ImplementationUtilities::HandleScopeData* data = | 890 v8::ImplementationUtilities::HandleScopeData* data = |
1050 impl_->isolate()->handle_scope_data(); | 891 impl_->isolate()->handle_scope_data(); |
1051 Object** new_next = impl_->GetSpareOrNewBlock(); | 892 Object** new_next = impl_->GetSpareOrNewBlock(); |
1052 Object** new_limit = &new_next[kHandleBlockSize]; | 893 Object** new_limit = &new_next[kHandleBlockSize]; |
1053 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]); | 894 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]); |
1054 impl_->blocks()->Add(new_next); | 895 impl_->blocks()->Add(new_next); |
(...skipping 23 matching lines...) Expand all Loading... |
1078 data->next = prev_next_; | 919 data->next = prev_next_; |
1079 data->limit = prev_limit_; | 920 data->limit = prev_limit_; |
1080 #ifdef DEBUG | 921 #ifdef DEBUG |
1081 handles_detached_ = true; | 922 handles_detached_ = true; |
1082 #endif | 923 #endif |
1083 return deferred; | 924 return deferred; |
1084 } | 925 } |
1085 | 926 |
1086 | 927 |
1087 } } // namespace v8::internal | 928 } } // namespace v8::internal |
OLD | NEW |