Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(123)

Side by Side Diff: src/handles.cc

Issue 11725006: Refactor out assumption that one byte strings are ascii in utf8 processing. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Fix array bounds issue Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/handles.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 865 matching lines...) Expand 10 before | Expand all | Expand 10 after
876 876
877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table, 877 Handle<ObjectHashTable> PutIntoObjectHashTable(Handle<ObjectHashTable> table,
878 Handle<Object> key, 878 Handle<Object> key,
879 Handle<Object> value) { 879 Handle<Object> value) {
880 CALL_HEAP_FUNCTION(table->GetIsolate(), 880 CALL_HEAP_FUNCTION(table->GetIsolate(),
881 table->Put(*key, *value), 881 table->Put(*key, *value),
882 ObjectHashTable); 882 ObjectHashTable);
883 } 883 }
884 884
885 885
886 // This method determines the type of string involved and then gets the UTF8
887 // length of the string. It doesn't flatten the string and has log(n) recursion
888 // for a string of length n. If the failure flag gets set, then we have to
889 // flatten the string and retry. Failures are caused by surrogate pairs in deep
890 // cons strings.
891
892 // Single surrogate characters that are encountered in the UTF-16 character
893 // sequence of the input string get counted as 3 UTF-8 bytes, because that
894 // is the way that WriteUtf8 will encode them. Surrogate pairs are counted and
895 // encoded as one 4-byte UTF-8 sequence.
896
897 // This function conceptually uses recursion on the two halves of cons strings.
898 // However, in order to avoid the recursion going too deep it recurses on the
899 // second string of the cons, but iterates on the first substring (by manually
900 // eliminating it as a tail recursion). This means it counts the UTF-8 length
901 // from the end to the start, which makes no difference to the total.
902
903 // Surrogate pairs are recognized even if they are split across two sides of a
904 // cons, which complicates the implementation somewhat. Therefore, too deep
905 // recursion cannot always be avoided. This case is detected, and the failure
906 // flag is set, a signal to the caller that the string should be flattened and
907 // the operation retried.
908 int Utf8LengthHelper(String* input,
909 int from,
910 int to,
911 bool followed_by_surrogate,
912 int max_recursion,
913 bool* failure,
914 bool* starts_with_surrogate) {
915 if (from == to) return 0;
916 int total = 0;
917 bool dummy;
918 while (true) {
919 if (input->IsOneByteRepresentation()) {
920 *starts_with_surrogate = false;
921 return total + to - from;
922 }
923 switch (StringShape(input).representation_tag()) {
924 case kConsStringTag: {
925 ConsString* str = ConsString::cast(input);
926 String* first = str->first();
927 String* second = str->second();
928 int first_length = first->length();
929 if (first_length - from > to - first_length) {
930 if (first_length < to) {
931 // Right hand side is shorter. No need to check the recursion depth
932 // since this can only happen log(n) times.
933 bool right_starts_with_surrogate = false;
934 total += Utf8LengthHelper(second,
935 0,
936 to - first_length,
937 followed_by_surrogate,
938 max_recursion - 1,
939 failure,
940 &right_starts_with_surrogate);
941 if (*failure) return 0;
942 followed_by_surrogate = right_starts_with_surrogate;
943 input = first;
944 to = first_length;
945 } else {
946 // We only need the left hand side.
947 input = first;
948 }
949 } else {
950 if (first_length > from) {
951 // Left hand side is shorter.
952 if (first->IsOneByteRepresentation()) {
953 total += first_length - from;
954 *starts_with_surrogate = false;
955 starts_with_surrogate = &dummy;
956 input = second;
957 from = 0;
958 to -= first_length;
959 } else if (second->IsOneByteRepresentation()) {
960 followed_by_surrogate = false;
961 total += to - first_length;
962 input = first;
963 to = first_length;
964 } else if (max_recursion > 0) {
965 bool right_starts_with_surrogate = false;
966 // Recursing on the long one. This may fail.
967 total += Utf8LengthHelper(second,
968 0,
969 to - first_length,
970 followed_by_surrogate,
971 max_recursion - 1,
972 failure,
973 &right_starts_with_surrogate);
974 if (*failure) return 0;
975 input = first;
976 to = first_length;
977 followed_by_surrogate = right_starts_with_surrogate;
978 } else {
979 *failure = true;
980 return 0;
981 }
982 } else {
983 // We only need the right hand side.
984 input = second;
985 from = 0;
986 to -= first_length;
987 }
988 }
989 continue;
990 }
991 case kExternalStringTag:
992 case kSeqStringTag: {
993 Vector<const uc16> vector = input->GetFlatContent().ToUC16Vector();
994 const uc16* p = vector.start();
995 int previous = unibrow::Utf16::kNoPreviousCharacter;
996 for (int i = from; i < to; i++) {
997 uc16 c = p[i];
998 total += unibrow::Utf8::Length(c, previous);
999 previous = c;
1000 }
1001 if (to - from > 0) {
1002 if (unibrow::Utf16::IsLeadSurrogate(previous) &&
1003 followed_by_surrogate) {
1004 total -= unibrow::Utf8::kBytesSavedByCombiningSurrogates;
1005 }
1006 if (unibrow::Utf16::IsTrailSurrogate(p[from])) {
1007 *starts_with_surrogate = true;
1008 }
1009 }
1010 return total;
1011 }
1012 case kSlicedStringTag: {
1013 SlicedString* str = SlicedString::cast(input);
1014 int offset = str->offset();
1015 input = str->parent();
1016 from += offset;
1017 to += offset;
1018 continue;
1019 }
1020 default:
1021 break;
1022 }
1023 UNREACHABLE();
1024 return 0;
1025 }
1026 return 0;
1027 }
1028
1029
1030 int Utf8Length(Handle<String> str) {
1031 bool dummy;
1032 bool failure;
1033 int len;
1034 const int kRecursionBudget = 100;
1035 do {
1036 failure = false;
1037 len = Utf8LengthHelper(
1038 *str, 0, str->length(), false, kRecursionBudget, &failure, &dummy);
1039 if (failure) FlattenString(str);
1040 } while (failure);
1041 return len;
1042 }
1043
1044
1045 DeferredHandleScope::DeferredHandleScope(Isolate* isolate) 886 DeferredHandleScope::DeferredHandleScope(Isolate* isolate)
1046 : impl_(isolate->handle_scope_implementer()) { 887 : impl_(isolate->handle_scope_implementer()) {
1047 ASSERT(impl_->isolate() == Isolate::Current()); 888 ASSERT(impl_->isolate() == Isolate::Current());
1048 impl_->BeginDeferredScope(); 889 impl_->BeginDeferredScope();
1049 v8::ImplementationUtilities::HandleScopeData* data = 890 v8::ImplementationUtilities::HandleScopeData* data =
1050 impl_->isolate()->handle_scope_data(); 891 impl_->isolate()->handle_scope_data();
1051 Object** new_next = impl_->GetSpareOrNewBlock(); 892 Object** new_next = impl_->GetSpareOrNewBlock();
1052 Object** new_limit = &new_next[kHandleBlockSize]; 893 Object** new_limit = &new_next[kHandleBlockSize];
1053 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]); 894 ASSERT(data->limit == &impl_->blocks()->last()[kHandleBlockSize]);
1054 impl_->blocks()->Add(new_next); 895 impl_->blocks()->Add(new_next);
(...skipping 23 matching lines...) Expand all
1078 data->next = prev_next_; 919 data->next = prev_next_;
1079 data->limit = prev_limit_; 920 data->limit = prev_limit_;
1080 #ifdef DEBUG 921 #ifdef DEBUG
1081 handles_detached_ = true; 922 handles_detached_ = true;
1082 #endif 923 #endif
1083 return deferred; 924 return deferred;
1084 } 925 }
1085 926
1086 927
1087 } } // namespace v8::internal 928 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/handles.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698