OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 5785 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5796 // become garbage; there is no reason to keep two identical strings | 5796 // become garbage; there is no reason to keep two identical strings |
5797 // alive. | 5797 // alive. |
5798 return s; | 5798 return s; |
5799 } | 5799 } |
5800 } | 5800 } |
5801 | 5801 |
5802 | 5802 |
5803 namespace { | 5803 namespace { |
5804 | 5804 |
5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | 5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; |
5806 | 5806 #ifdef ENABLE_LATIN_1 |
5807 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; | |
5808 #endif | |
5807 | 5809 |
5808 // Given a word and two range boundaries returns a word with high bit | 5810 // Given a word and two range boundaries returns a word with high bit |
5809 // set in every byte iff the corresponding input byte was strictly in | 5811 // set in every byte iff the corresponding input byte was strictly in |
5810 // the range (m, n). All the other bits in the result are cleared. | 5812 // the range (m, n). All the other bits in the result are cleared. |
5811 // This function is only useful when it can be inlined and the | 5813 // This function is only useful when it can be inlined and the |
5812 // boundaries are statically known. | 5814 // boundaries are statically known. |
5813 // Requires: all bytes in the input word and the boundaries must be | 5815 // Requires: all bytes in the input word and the boundaries must be |
5814 // ASCII (less than 0x7F). | 5816 // ASCII (less than 0x7F). |
5815 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { | 5817 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { |
5816 // Every byte in an ASCII string is less than or equal to 0x7F. | 5818 // Every byte in an ASCII string is less than or equal to 0x7F. |
5817 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); | 5819 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); |
5818 // Use strict inequalities since in edge cases the function could be | 5820 // Use strict inequalities since in edge cases the function could be |
5819 // further simplified. | 5821 // further simplified. |
5820 ASSERT(0 < m && m < n && n < 0x7F); | 5822 ASSERT(0 < m && m < n); |
5823 #ifndef ENABLE_LATIN_1 | |
5824 ASSERT(n < 0x7F); | |
5825 #endif | |
5821 // Has high bit set in every w byte less than n. | 5826 // Has high bit set in every w byte less than n. |
5822 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | 5827 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; |
5823 // Has high bit set in every w byte greater than m. | 5828 // Has high bit set in every w byte greater than m. |
5824 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | 5829 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); |
5825 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | 5830 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); |
5826 } | 5831 } |
5827 | 5832 |
5828 | 5833 |
5829 enum AsciiCaseConversion { | 5834 enum AsciiCaseConversion { |
5830 ASCII_TO_LOWER, | 5835 ASCII_TO_LOWER, |
5831 ASCII_TO_UPPER | 5836 ASCII_TO_UPPER |
5832 }; | 5837 }; |
5833 | 5838 |
5834 | 5839 |
5835 template <AsciiCaseConversion dir> | 5840 template <AsciiCaseConversion dir> |
5836 struct FastAsciiConverter { | 5841 struct FastAsciiConverter { |
5842 #ifdef ENABLE_LATIN_1 | |
5843 static bool Convert(char* dst, char* src, int length, bool* changed_out) { | |
5844 #else | |
5837 static bool Convert(char* dst, char* src, int length) { | 5845 static bool Convert(char* dst, char* src, int length) { |
5846 #endif | |
5838 #ifdef DEBUG | 5847 #ifdef DEBUG |
5839 char* saved_dst = dst; | 5848 char* saved_dst = dst; |
5840 char* saved_src = src; | 5849 char* saved_src = src; |
5841 #endif | 5850 #endif |
5842 // We rely on the distance between upper and lower case letters | 5851 // We rely on the distance between upper and lower case letters |
5843 // being a known power of 2. | 5852 // being a known power of 2. |
5844 ASSERT('a' - 'A' == (1 << 5)); | 5853 ASSERT('a' - 'A' == (1 << 5)); |
5845 // Boundaries for the range of input characters than require conversion. | 5854 // Boundaries for the range of input characters than require conversion. |
5846 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; | 5855 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; |
5847 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; | 5856 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; |
5848 bool changed = false; | 5857 bool changed = false; |
5858 #ifdef ENABLE_LATIN_1 | |
5859 uintptr_t or_acc = 0; | |
5860 #endif | |
5849 char* const limit = src + length; | 5861 char* const limit = src + length; |
5850 #ifdef V8_HOST_CAN_READ_UNALIGNED | 5862 #ifdef V8_HOST_CAN_READ_UNALIGNED |
5851 // Process the prefix of the input that requires no conversion one | 5863 // Process the prefix of the input that requires no conversion one |
5852 // (machine) word at a time. | 5864 // (machine) word at a time. |
5853 while (src <= limit - sizeof(uintptr_t)) { | 5865 while (src <= limit - sizeof(uintptr_t)) { |
5854 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
5867 #ifdef ENABLE_LATIN_1 | |
5868 or_acc |= w; | |
5869 #endif | |
5855 if (AsciiRangeMask(w, lo, hi) != 0) { | 5870 if (AsciiRangeMask(w, lo, hi) != 0) { |
5856 changed = true; | 5871 changed = true; |
5857 break; | 5872 break; |
5858 } | 5873 } |
5859 *reinterpret_cast<uintptr_t*>(dst) = w; | 5874 *reinterpret_cast<uintptr_t*>(dst) = w; |
5860 src += sizeof(uintptr_t); | 5875 src += sizeof(uintptr_t); |
5861 dst += sizeof(uintptr_t); | 5876 dst += sizeof(uintptr_t); |
5862 } | 5877 } |
5863 // Process the remainder of the input performing conversion when | 5878 // Process the remainder of the input performing conversion when |
5864 // required one word at a time. | 5879 // required one word at a time. |
5865 while (src <= limit - sizeof(uintptr_t)) { | 5880 while (src <= limit - sizeof(uintptr_t)) { |
5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5881 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
5882 #ifdef ENABLE_LATIN_1 | |
5883 or_acc |= w; | |
5884 #endif | |
5867 uintptr_t m = AsciiRangeMask(w, lo, hi); | 5885 uintptr_t m = AsciiRangeMask(w, lo, hi); |
5868 // The mask has high (7th) bit set in every byte that needs | 5886 // The mask has high (7th) bit set in every byte that needs |
5869 // conversion and we know that the distance between cases is | 5887 // conversion and we know that the distance between cases is |
5870 // 1 << 5. | 5888 // 1 << 5. |
5871 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | 5889 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); |
5872 src += sizeof(uintptr_t); | 5890 src += sizeof(uintptr_t); |
5873 dst += sizeof(uintptr_t); | 5891 dst += sizeof(uintptr_t); |
5874 } | 5892 } |
5875 #endif | 5893 #endif |
5876 // Process the last few bytes of the input (or the whole input if | 5894 // Process the last few bytes of the input (or the whole input if |
5877 // unaligned access is not supported). | 5895 // unaligned access is not supported). |
5878 while (src < limit) { | 5896 while (src < limit) { |
5879 char c = *src; | 5897 char c = *src; |
5898 #ifdef ENABLE_LATIN_1 | |
5899 or_acc |= c; | |
5900 #endif | |
5880 if (lo < c && c < hi) { | 5901 if (lo < c && c < hi) { |
5881 c ^= (1 << 5); | 5902 c ^= (1 << 5); |
5882 changed = true; | 5903 changed = true; |
5883 } | 5904 } |
5884 *dst = c; | 5905 *dst = c; |
5885 ++src; | 5906 ++src; |
5886 ++dst; | 5907 ++dst; |
5887 } | 5908 } |
5909 #ifdef ENABLE_LATIN_1 | |
5910 if ((or_acc & kAsciiMask) != 0) { | |
Yang
2013/01/14 15:56:51
Would it be noticeably more expensive to do this c
| |
5911 return false; | |
5912 } | |
5913 #endif | |
5888 #ifdef DEBUG | 5914 #ifdef DEBUG |
5889 CheckConvert(saved_dst, saved_src, length, changed); | 5915 CheckConvert(saved_dst, saved_src, length, changed); |
5890 #endif | 5916 #endif |
5917 #ifdef ENABLE_LATIN_1 | |
5918 *changed_out = changed; | |
5919 return true; | |
5920 #else | |
5891 return changed; | 5921 return changed; |
5922 #endif | |
5892 } | 5923 } |
5893 | 5924 |
5894 #ifdef DEBUG | 5925 #ifdef DEBUG |
5895 static void CheckConvert(char* dst, char* src, int length, bool changed) { | 5926 static void CheckConvert(char* dst, char* src, int length, bool changed) { |
5896 bool expected_changed = false; | 5927 bool expected_changed = false; |
5897 for (int i = 0; i < length; i++) { | 5928 for (int i = 0; i < length; i++) { |
5898 if (dst[i] == src[i]) continue; | 5929 if (dst[i] == src[i]) continue; |
5899 expected_changed = true; | 5930 expected_changed = true; |
5900 if (dir == ASCII_TO_LOWER) { | 5931 if (dir == ASCII_TO_LOWER) { |
5901 ASSERT('A' <= src[i] && src[i] <= 'Z'); | 5932 ASSERT('A' <= src[i] && src[i] <= 'Z'); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5934 Isolate* isolate, | 5965 Isolate* isolate, |
5935 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { | 5966 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { |
5936 NoHandleAllocation ha; | 5967 NoHandleAllocation ha; |
5937 CONVERT_ARG_CHECKED(String, s, 0); | 5968 CONVERT_ARG_CHECKED(String, s, 0); |
5938 s = s->TryFlattenGetString(); | 5969 s = s->TryFlattenGetString(); |
5939 | 5970 |
5940 const int length = s->length(); | 5971 const int length = s->length(); |
5941 // Assume that the string is not empty; we need this assumption later | 5972 // Assume that the string is not empty; we need this assumption later |
5942 if (length == 0) return s; | 5973 if (length == 0) return s; |
5943 | 5974 |
5944 #ifndef ENABLE_LATIN_1 | |
5945 // Simpler handling of ASCII strings. | 5975 // Simpler handling of ASCII strings. |
5946 // | 5976 // |
5947 // NOTE: This assumes that the upper/lower case of an ASCII | 5977 // NOTE: This assumes that the upper/lower case of an ASCII |
5948 // character is also ASCII. This is currently the case, but it | 5978 // character is also ASCII. This is currently the case, but it |
5949 // might break in the future if we implement more context and locale | 5979 // might break in the future if we implement more context and locale |
5950 // dependent upper/lower conversions. | 5980 // dependent upper/lower conversions. |
5951 if (s->IsSeqOneByteString()) { | 5981 if (s->IsSeqOneByteString()) { |
5952 Object* o; | 5982 Object* o; |
5953 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); | 5983 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); |
5954 if (!maybe_o->ToObject(&o)) return maybe_o; | 5984 if (!maybe_o->ToObject(&o)) return maybe_o; |
5955 } | 5985 } |
5956 SeqOneByteString* result = SeqOneByteString::cast(o); | 5986 SeqOneByteString* result = SeqOneByteString::cast(o); |
5987 #ifndef ENABLE_LATIN_1 | |
5957 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( | 5988 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( |
5958 reinterpret_cast<char*>(result->GetChars()), | 5989 reinterpret_cast<char*>(result->GetChars()), |
5959 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | 5990 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), |
5960 length); | 5991 length); |
5961 return has_changed_character ? result : s; | 5992 return has_changed_character ? result : s; |
5993 #else | |
5994 bool has_changed_character; | |
5995 bool is_ascii = ConvertTraits::AsciiConverter::Convert( | |
5996 reinterpret_cast<char*>(result->GetChars()), | |
5997 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | |
5998 length, | |
5999 &has_changed_character); | |
6000 // If not ASCII, we discard the result and take the 2 byte path. | |
6001 if (is_ascii) { | |
Yang
2013/01/14 15:56:51
It looks like we could save time if Convert failed
| |
6002 return has_changed_character ? result : s; | |
6003 } | |
6004 #endif | |
5962 } | 6005 } |
5963 #endif | |
5964 | 6006 |
5965 Object* answer; | 6007 Object* answer; |
5966 { MaybeObject* maybe_answer = | 6008 { MaybeObject* maybe_answer = |
5967 ConvertCaseHelper(isolate, s, length, length, mapping); | 6009 ConvertCaseHelper(isolate, s, length, length, mapping); |
5968 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | 6010 if (!maybe_answer->ToObject(&answer)) return maybe_answer; |
5969 } | 6011 } |
5970 if (answer->IsSmi()) { | 6012 if (answer->IsSmi()) { |
5971 // Retry with correct length. | 6013 // Retry with correct length. |
5972 { MaybeObject* maybe_answer = | 6014 { MaybeObject* maybe_answer = |
5973 ConvertCaseHelper(isolate, | 6015 ConvertCaseHelper(isolate, |
(...skipping 7669 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
13643 // Handle last resort GC and make sure to allow future allocations | 13685 // Handle last resort GC and make sure to allow future allocations |
13644 // to grow the heap without causing GCs (if possible). | 13686 // to grow the heap without causing GCs (if possible). |
13645 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13687 isolate->counters()->gc_last_resort_from_js()->Increment(); |
13646 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 13688 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
13647 "Runtime::PerformGC"); | 13689 "Runtime::PerformGC"); |
13648 } | 13690 } |
13649 } | 13691 } |
13650 | 13692 |
13651 | 13693 |
13652 } } // namespace v8::internal | 13694 } } // namespace v8::internal |
OLD | NEW |