Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(520)

Side by Side Diff: src/runtime.cc

Issue 11889007: Add back ascii fast path for toupper/tolower (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 5785 matching lines...) Expand 10 before | Expand all | Expand 10 after
5796 // become garbage; there is no reason to keep two identical strings 5796 // become garbage; there is no reason to keep two identical strings
5797 // alive. 5797 // alive.
5798 return s; 5798 return s;
5799 } 5799 }
5800 } 5800 }
5801 5801
5802 5802
5803 namespace { 5803 namespace {
5804 5804
5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; 5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;
5806 5806 #ifdef ENABLE_LATIN_1
5807 static const uintptr_t kAsciiMask = kOneInEveryByte << 7;
5808 #endif
5807 5809
5808 // Given a word and two range boundaries returns a word with high bit 5810 // Given a word and two range boundaries returns a word with high bit
5809 // set in every byte iff the corresponding input byte was strictly in 5811 // set in every byte iff the corresponding input byte was strictly in
5810 // the range (m, n). All the other bits in the result are cleared. 5812 // the range (m, n). All the other bits in the result are cleared.
5811 // This function is only useful when it can be inlined and the 5813 // This function is only useful when it can be inlined and the
5812 // boundaries are statically known. 5814 // boundaries are statically known.
5813 // Requires: all bytes in the input word and the boundaries must be 5815 // Requires: all bytes in the input word and the boundaries must be
5814 // ASCII (less than 0x7F). 5816 // ASCII (less than 0x7F).
5815 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { 5817 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {
5816 // Every byte in an ASCII string is less than or equal to 0x7F. 5818 // Every byte in an ASCII string is less than or equal to 0x7F.
5817 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); 5819 ASSERT((w & (kOneInEveryByte * 0x7F)) == w);
5818 // Use strict inequalities since in edge cases the function could be 5820 // Use strict inequalities since in edge cases the function could be
5819 // further simplified. 5821 // further simplified.
5820 ASSERT(0 < m && m < n && n < 0x7F); 5822 ASSERT(0 < m && m < n);
5823 #ifndef ENABLE_LATIN_1
5824 ASSERT(n < 0x7F);
5825 #endif
5821 // Has high bit set in every w byte less than n. 5826 // Has high bit set in every w byte less than n.
5822 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; 5827 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;
5823 // Has high bit set in every w byte greater than m. 5828 // Has high bit set in every w byte greater than m.
5824 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); 5829 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m);
5825 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); 5830 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
5826 } 5831 }
5827 5832
5828 5833
5829 enum AsciiCaseConversion { 5834 enum AsciiCaseConversion {
5830 ASCII_TO_LOWER, 5835 ASCII_TO_LOWER,
5831 ASCII_TO_UPPER 5836 ASCII_TO_UPPER
5832 }; 5837 };
5833 5838
5834 5839
5835 template <AsciiCaseConversion dir> 5840 template <AsciiCaseConversion dir>
5836 struct FastAsciiConverter { 5841 struct FastAsciiConverter {
5842 #ifdef ENABLE_LATIN_1
5843 static bool Convert(char* dst, char* src, int length, bool* changed_out) {
5844 #else
5837 static bool Convert(char* dst, char* src, int length) { 5845 static bool Convert(char* dst, char* src, int length) {
5846 #endif
5838 #ifdef DEBUG 5847 #ifdef DEBUG
5839 char* saved_dst = dst; 5848 char* saved_dst = dst;
5840 char* saved_src = src; 5849 char* saved_src = src;
5841 #endif 5850 #endif
5842 // We rely on the distance between upper and lower case letters 5851 // We rely on the distance between upper and lower case letters
5843 // being a known power of 2. 5852 // being a known power of 2.
5844 ASSERT('a' - 'A' == (1 << 5)); 5853 ASSERT('a' - 'A' == (1 << 5));
5845 // Boundaries for the range of input characters than require conversion. 5854 // Boundaries for the range of input characters than require conversion.
5846 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; 5855 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;
5847 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; 5856 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;
5848 bool changed = false; 5857 bool changed = false;
5858 #ifdef ENABLE_LATIN_1
5859 uintptr_t or_acc = 0;
5860 #endif
5849 char* const limit = src + length; 5861 char* const limit = src + length;
5850 #ifdef V8_HOST_CAN_READ_UNALIGNED 5862 #ifdef V8_HOST_CAN_READ_UNALIGNED
5851 // Process the prefix of the input that requires no conversion one 5863 // Process the prefix of the input that requires no conversion one
5852 // (machine) word at a time. 5864 // (machine) word at a time.
5853 while (src <= limit - sizeof(uintptr_t)) { 5865 while (src <= limit - sizeof(uintptr_t)) {
5854 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
5867 #ifdef ENABLE_LATIN_1
5868 or_acc |= w;
5869 #endif
5855 if (AsciiRangeMask(w, lo, hi) != 0) { 5870 if (AsciiRangeMask(w, lo, hi) != 0) {
5856 changed = true; 5871 changed = true;
5857 break; 5872 break;
5858 } 5873 }
5859 *reinterpret_cast<uintptr_t*>(dst) = w; 5874 *reinterpret_cast<uintptr_t*>(dst) = w;
5860 src += sizeof(uintptr_t); 5875 src += sizeof(uintptr_t);
5861 dst += sizeof(uintptr_t); 5876 dst += sizeof(uintptr_t);
5862 } 5877 }
5863 // Process the remainder of the input performing conversion when 5878 // Process the remainder of the input performing conversion when
5864 // required one word at a time. 5879 // required one word at a time.
5865 while (src <= limit - sizeof(uintptr_t)) { 5880 while (src <= limit - sizeof(uintptr_t)) {
5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); 5881 uintptr_t w = *reinterpret_cast<uintptr_t*>(src);
5882 #ifdef ENABLE_LATIN_1
5883 or_acc |= w;
5884 #endif
5867 uintptr_t m = AsciiRangeMask(w, lo, hi); 5885 uintptr_t m = AsciiRangeMask(w, lo, hi);
5868 // The mask has high (7th) bit set in every byte that needs 5886 // The mask has high (7th) bit set in every byte that needs
5869 // conversion and we know that the distance between cases is 5887 // conversion and we know that the distance between cases is
5870 // 1 << 5. 5888 // 1 << 5.
5871 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); 5889 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2);
5872 src += sizeof(uintptr_t); 5890 src += sizeof(uintptr_t);
5873 dst += sizeof(uintptr_t); 5891 dst += sizeof(uintptr_t);
5874 } 5892 }
5875 #endif 5893 #endif
5876 // Process the last few bytes of the input (or the whole input if 5894 // Process the last few bytes of the input (or the whole input if
5877 // unaligned access is not supported). 5895 // unaligned access is not supported).
5878 while (src < limit) { 5896 while (src < limit) {
5879 char c = *src; 5897 char c = *src;
5898 #ifdef ENABLE_LATIN_1
5899 or_acc |= c;
5900 #endif
5880 if (lo < c && c < hi) { 5901 if (lo < c && c < hi) {
5881 c ^= (1 << 5); 5902 c ^= (1 << 5);
5882 changed = true; 5903 changed = true;
5883 } 5904 }
5884 *dst = c; 5905 *dst = c;
5885 ++src; 5906 ++src;
5886 ++dst; 5907 ++dst;
5887 } 5908 }
5909 #ifdef ENABLE_LATIN_1
5910 if ((or_acc & kAsciiMask) != 0) {
Yang 2013/01/14 15:56:51 Would it be noticeably more expensive to do this c
5911 return false;
5912 }
5913 #endif
5888 #ifdef DEBUG 5914 #ifdef DEBUG
5889 CheckConvert(saved_dst, saved_src, length, changed); 5915 CheckConvert(saved_dst, saved_src, length, changed);
5890 #endif 5916 #endif
5917 #ifdef ENABLE_LATIN_1
5918 *changed_out = changed;
5919 return true;
5920 #else
5891 return changed; 5921 return changed;
5922 #endif
5892 } 5923 }
5893 5924
5894 #ifdef DEBUG 5925 #ifdef DEBUG
5895 static void CheckConvert(char* dst, char* src, int length, bool changed) { 5926 static void CheckConvert(char* dst, char* src, int length, bool changed) {
5896 bool expected_changed = false; 5927 bool expected_changed = false;
5897 for (int i = 0; i < length; i++) { 5928 for (int i = 0; i < length; i++) {
5898 if (dst[i] == src[i]) continue; 5929 if (dst[i] == src[i]) continue;
5899 expected_changed = true; 5930 expected_changed = true;
5900 if (dir == ASCII_TO_LOWER) { 5931 if (dir == ASCII_TO_LOWER) {
5901 ASSERT('A' <= src[i] && src[i] <= 'Z'); 5932 ASSERT('A' <= src[i] && src[i] <= 'Z');
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
5934 Isolate* isolate, 5965 Isolate* isolate,
5935 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { 5966 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) {
5936 NoHandleAllocation ha; 5967 NoHandleAllocation ha;
5937 CONVERT_ARG_CHECKED(String, s, 0); 5968 CONVERT_ARG_CHECKED(String, s, 0);
5938 s = s->TryFlattenGetString(); 5969 s = s->TryFlattenGetString();
5939 5970
5940 const int length = s->length(); 5971 const int length = s->length();
5941 // Assume that the string is not empty; we need this assumption later 5972 // Assume that the string is not empty; we need this assumption later
5942 if (length == 0) return s; 5973 if (length == 0) return s;
5943 5974
5944 #ifndef ENABLE_LATIN_1
5945 // Simpler handling of ASCII strings. 5975 // Simpler handling of ASCII strings.
5946 // 5976 //
5947 // NOTE: This assumes that the upper/lower case of an ASCII 5977 // NOTE: This assumes that the upper/lower case of an ASCII
5948 // character is also ASCII. This is currently the case, but it 5978 // character is also ASCII. This is currently the case, but it
5949 // might break in the future if we implement more context and locale 5979 // might break in the future if we implement more context and locale
5950 // dependent upper/lower conversions. 5980 // dependent upper/lower conversions.
5951 if (s->IsSeqOneByteString()) { 5981 if (s->IsSeqOneByteString()) {
5952 Object* o; 5982 Object* o;
5953 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); 5983 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length);
5954 if (!maybe_o->ToObject(&o)) return maybe_o; 5984 if (!maybe_o->ToObject(&o)) return maybe_o;
5955 } 5985 }
5956 SeqOneByteString* result = SeqOneByteString::cast(o); 5986 SeqOneByteString* result = SeqOneByteString::cast(o);
5987 #ifndef ENABLE_LATIN_1
5957 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( 5988 bool has_changed_character = ConvertTraits::AsciiConverter::Convert(
5958 reinterpret_cast<char*>(result->GetChars()), 5989 reinterpret_cast<char*>(result->GetChars()),
5959 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), 5990 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
5960 length); 5991 length);
5961 return has_changed_character ? result : s; 5992 return has_changed_character ? result : s;
5993 #else
5994 bool has_changed_character;
5995 bool is_ascii = ConvertTraits::AsciiConverter::Convert(
5996 reinterpret_cast<char*>(result->GetChars()),
5997 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
5998 length,
5999 &has_changed_character);
6000 // If not ASCII, we discard the result and take the 2 byte path.
6001 if (is_ascii) {
Yang 2013/01/14 15:56:51 It looks like we could save time if Convert failed
6002 return has_changed_character ? result : s;
6003 }
6004 #endif
5962 } 6005 }
5963 #endif
5964 6006
5965 Object* answer; 6007 Object* answer;
5966 { MaybeObject* maybe_answer = 6008 { MaybeObject* maybe_answer =
5967 ConvertCaseHelper(isolate, s, length, length, mapping); 6009 ConvertCaseHelper(isolate, s, length, length, mapping);
5968 if (!maybe_answer->ToObject(&answer)) return maybe_answer; 6010 if (!maybe_answer->ToObject(&answer)) return maybe_answer;
5969 } 6011 }
5970 if (answer->IsSmi()) { 6012 if (answer->IsSmi()) {
5971 // Retry with correct length. 6013 // Retry with correct length.
5972 { MaybeObject* maybe_answer = 6014 { MaybeObject* maybe_answer =
5973 ConvertCaseHelper(isolate, 6015 ConvertCaseHelper(isolate,
(...skipping 7669 matching lines...) Expand 10 before | Expand all | Expand 10 after
13643 // Handle last resort GC and make sure to allow future allocations 13685 // Handle last resort GC and make sure to allow future allocations
13644 // to grow the heap without causing GCs (if possible). 13686 // to grow the heap without causing GCs (if possible).
13645 isolate->counters()->gc_last_resort_from_js()->Increment(); 13687 isolate->counters()->gc_last_resort_from_js()->Increment();
13646 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13688 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13647 "Runtime::PerformGC"); 13689 "Runtime::PerformGC");
13648 } 13690 }
13649 } 13691 }
13650 13692
13651 13693
13652 } } // namespace v8::internal 13694 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698