Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 5785 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5796 // become garbage; there is no reason to keep two identical strings | 5796 // become garbage; there is no reason to keep two identical strings |
| 5797 // alive. | 5797 // alive. |
| 5798 return s; | 5798 return s; |
| 5799 } | 5799 } |
| 5800 } | 5800 } |
| 5801 | 5801 |
| 5802 | 5802 |
| 5803 namespace { | 5803 namespace { |
| 5804 | 5804 |
| 5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; | 5805 static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF; |
| 5806 | 5806 #ifdef ENABLE_LATIN_1 |
| 5807 static const uintptr_t kAsciiMask = kOneInEveryByte << 7; | |
| 5808 #endif | |
| 5807 | 5809 |
| 5808 // Given a word and two range boundaries returns a word with high bit | 5810 // Given a word and two range boundaries returns a word with high bit |
| 5809 // set in every byte iff the corresponding input byte was strictly in | 5811 // set in every byte iff the corresponding input byte was strictly in |
| 5810 // the range (m, n). All the other bits in the result are cleared. | 5812 // the range (m, n). All the other bits in the result are cleared. |
| 5811 // This function is only useful when it can be inlined and the | 5813 // This function is only useful when it can be inlined and the |
| 5812 // boundaries are statically known. | 5814 // boundaries are statically known. |
| 5813 // Requires: all bytes in the input word and the boundaries must be | 5815 // Requires: all bytes in the input word and the boundaries must be |
| 5814 // ASCII (less than 0x7F). | 5816 // ASCII (less than 0x7F). |
| 5815 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { | 5817 static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) { |
| 5816 // Every byte in an ASCII string is less than or equal to 0x7F. | 5818 // Every byte in an ASCII string is less than or equal to 0x7F. |
| 5817 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); | 5819 ASSERT((w & (kOneInEveryByte * 0x7F)) == w); |
| 5818 // Use strict inequalities since in edge cases the function could be | 5820 // Use strict inequalities since in edge cases the function could be |
| 5819 // further simplified. | 5821 // further simplified. |
| 5820 ASSERT(0 < m && m < n && n < 0x7F); | 5822 ASSERT(0 < m && m < n); |
| 5823 #ifndef ENABLE_LATIN_1 | |
| 5824 ASSERT(n < 0x7F); | |
| 5825 #endif | |
| 5821 // Has high bit set in every w byte less than n. | 5826 // Has high bit set in every w byte less than n. |
| 5822 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; | 5827 uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w; |
| 5823 // Has high bit set in every w byte greater than m. | 5828 // Has high bit set in every w byte greater than m. |
| 5824 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); | 5829 uintptr_t tmp2 = w + kOneInEveryByte * (0x7F - m); |
| 5825 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); | 5830 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); |
| 5826 } | 5831 } |
| 5827 | 5832 |
| 5828 | 5833 |
| 5829 enum AsciiCaseConversion { | 5834 enum AsciiCaseConversion { |
| 5830 ASCII_TO_LOWER, | 5835 ASCII_TO_LOWER, |
| 5831 ASCII_TO_UPPER | 5836 ASCII_TO_UPPER |
| 5832 }; | 5837 }; |
| 5833 | 5838 |
| 5834 | 5839 |
| 5835 template <AsciiCaseConversion dir> | 5840 template <AsciiCaseConversion dir> |
| 5836 struct FastAsciiConverter { | 5841 struct FastAsciiConverter { |
| 5842 #ifdef ENABLE_LATIN_1 | |
| 5843 static bool Convert(char* dst, char* src, int length, bool* changed_out) { | |
| 5844 #else | |
| 5837 static bool Convert(char* dst, char* src, int length) { | 5845 static bool Convert(char* dst, char* src, int length) { |
| 5846 #endif | |
| 5838 #ifdef DEBUG | 5847 #ifdef DEBUG |
| 5839 char* saved_dst = dst; | 5848 char* saved_dst = dst; |
| 5840 char* saved_src = src; | 5849 char* saved_src = src; |
| 5841 #endif | 5850 #endif |
| 5842 // We rely on the distance between upper and lower case letters | 5851 // We rely on the distance between upper and lower case letters |
| 5843 // being a known power of 2. | 5852 // being a known power of 2. |
| 5844 ASSERT('a' - 'A' == (1 << 5)); | 5853 ASSERT('a' - 'A' == (1 << 5)); |
| 5845 // Boundaries for the range of input characters than require conversion. | 5854 // Boundaries for the range of input characters than require conversion. |
| 5846 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; | 5855 const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1; |
| 5847 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; | 5856 const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1; |
| 5848 bool changed = false; | 5857 bool changed = false; |
| 5858 #ifdef ENABLE_LATIN_1 | |
| 5859 uintptr_t or_acc = 0; | |
| 5860 #endif | |
| 5849 char* const limit = src + length; | 5861 char* const limit = src + length; |
| 5850 #ifdef V8_HOST_CAN_READ_UNALIGNED | 5862 #ifdef V8_HOST_CAN_READ_UNALIGNED |
| 5851 // Process the prefix of the input that requires no conversion one | 5863 // Process the prefix of the input that requires no conversion one |
| 5852 // (machine) word at a time. | 5864 // (machine) word at a time. |
| 5853 while (src <= limit - sizeof(uintptr_t)) { | 5865 while (src <= limit - sizeof(uintptr_t)) { |
| 5854 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
| 5867 #ifdef ENABLE_LATIN_1 | |
| 5868 or_acc |= w; | |
| 5869 #endif | |
| 5855 if (AsciiRangeMask(w, lo, hi) != 0) { | 5870 if (AsciiRangeMask(w, lo, hi) != 0) { |
| 5856 changed = true; | 5871 changed = true; |
| 5857 break; | 5872 break; |
| 5858 } | 5873 } |
| 5859 *reinterpret_cast<uintptr_t*>(dst) = w; | 5874 *reinterpret_cast<uintptr_t*>(dst) = w; |
| 5860 src += sizeof(uintptr_t); | 5875 src += sizeof(uintptr_t); |
| 5861 dst += sizeof(uintptr_t); | 5876 dst += sizeof(uintptr_t); |
| 5862 } | 5877 } |
| 5863 // Process the remainder of the input performing conversion when | 5878 // Process the remainder of the input performing conversion when |
| 5864 // required one word at a time. | 5879 // required one word at a time. |
| 5865 while (src <= limit - sizeof(uintptr_t)) { | 5880 while (src <= limit - sizeof(uintptr_t)) { |
| 5866 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); | 5881 uintptr_t w = *reinterpret_cast<uintptr_t*>(src); |
| 5882 #ifdef ENABLE_LATIN_1 | |
| 5883 or_acc |= w; | |
| 5884 #endif | |
| 5867 uintptr_t m = AsciiRangeMask(w, lo, hi); | 5885 uintptr_t m = AsciiRangeMask(w, lo, hi); |
| 5868 // The mask has high (7th) bit set in every byte that needs | 5886 // The mask has high (7th) bit set in every byte that needs |
| 5869 // conversion and we know that the distance between cases is | 5887 // conversion and we know that the distance between cases is |
| 5870 // 1 << 5. | 5888 // 1 << 5. |
| 5871 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); | 5889 *reinterpret_cast<uintptr_t*>(dst) = w ^ (m >> 2); |
| 5872 src += sizeof(uintptr_t); | 5890 src += sizeof(uintptr_t); |
| 5873 dst += sizeof(uintptr_t); | 5891 dst += sizeof(uintptr_t); |
| 5874 } | 5892 } |
| 5875 #endif | 5893 #endif |
| 5876 // Process the last few bytes of the input (or the whole input if | 5894 // Process the last few bytes of the input (or the whole input if |
| 5877 // unaligned access is not supported). | 5895 // unaligned access is not supported). |
| 5878 while (src < limit) { | 5896 while (src < limit) { |
| 5879 char c = *src; | 5897 char c = *src; |
| 5898 #ifdef ENABLE_LATIN_1 | |
| 5899 or_acc |= c; | |
| 5900 #endif | |
| 5880 if (lo < c && c < hi) { | 5901 if (lo < c && c < hi) { |
| 5881 c ^= (1 << 5); | 5902 c ^= (1 << 5); |
| 5882 changed = true; | 5903 changed = true; |
| 5883 } | 5904 } |
| 5884 *dst = c; | 5905 *dst = c; |
| 5885 ++src; | 5906 ++src; |
| 5886 ++dst; | 5907 ++dst; |
| 5887 } | 5908 } |
| 5909 #ifdef ENABLE_LATIN_1 | |
| 5910 if ((or_acc & kAsciiMask) != 0) { | |
|
Yang
2013/01/14 15:56:51
Would it be noticeably more expensive to do this c
| |
| 5911 return false; | |
| 5912 } | |
| 5913 #endif | |
| 5888 #ifdef DEBUG | 5914 #ifdef DEBUG |
| 5889 CheckConvert(saved_dst, saved_src, length, changed); | 5915 CheckConvert(saved_dst, saved_src, length, changed); |
| 5890 #endif | 5916 #endif |
| 5917 #ifdef ENABLE_LATIN_1 | |
| 5918 *changed_out = changed; | |
| 5919 return true; | |
| 5920 #else | |
| 5891 return changed; | 5921 return changed; |
| 5922 #endif | |
| 5892 } | 5923 } |
| 5893 | 5924 |
| 5894 #ifdef DEBUG | 5925 #ifdef DEBUG |
| 5895 static void CheckConvert(char* dst, char* src, int length, bool changed) { | 5926 static void CheckConvert(char* dst, char* src, int length, bool changed) { |
| 5896 bool expected_changed = false; | 5927 bool expected_changed = false; |
| 5897 for (int i = 0; i < length; i++) { | 5928 for (int i = 0; i < length; i++) { |
| 5898 if (dst[i] == src[i]) continue; | 5929 if (dst[i] == src[i]) continue; |
| 5899 expected_changed = true; | 5930 expected_changed = true; |
| 5900 if (dir == ASCII_TO_LOWER) { | 5931 if (dir == ASCII_TO_LOWER) { |
| 5901 ASSERT('A' <= src[i] && src[i] <= 'Z'); | 5932 ASSERT('A' <= src[i] && src[i] <= 'Z'); |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5934 Isolate* isolate, | 5965 Isolate* isolate, |
| 5935 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { | 5966 unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>* mapping) { |
| 5936 NoHandleAllocation ha; | 5967 NoHandleAllocation ha; |
| 5937 CONVERT_ARG_CHECKED(String, s, 0); | 5968 CONVERT_ARG_CHECKED(String, s, 0); |
| 5938 s = s->TryFlattenGetString(); | 5969 s = s->TryFlattenGetString(); |
| 5939 | 5970 |
| 5940 const int length = s->length(); | 5971 const int length = s->length(); |
| 5941 // Assume that the string is not empty; we need this assumption later | 5972 // Assume that the string is not empty; we need this assumption later |
| 5942 if (length == 0) return s; | 5973 if (length == 0) return s; |
| 5943 | 5974 |
| 5944 #ifndef ENABLE_LATIN_1 | |
| 5945 // Simpler handling of ASCII strings. | 5975 // Simpler handling of ASCII strings. |
| 5946 // | 5976 // |
| 5947 // NOTE: This assumes that the upper/lower case of an ASCII | 5977 // NOTE: This assumes that the upper/lower case of an ASCII |
| 5948 // character is also ASCII. This is currently the case, but it | 5978 // character is also ASCII. This is currently the case, but it |
| 5949 // might break in the future if we implement more context and locale | 5979 // might break in the future if we implement more context and locale |
| 5950 // dependent upper/lower conversions. | 5980 // dependent upper/lower conversions. |
| 5951 if (s->IsSeqOneByteString()) { | 5981 if (s->IsSeqOneByteString()) { |
| 5952 Object* o; | 5982 Object* o; |
| 5953 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); | 5983 { MaybeObject* maybe_o = isolate->heap()->AllocateRawOneByteString(length); |
| 5954 if (!maybe_o->ToObject(&o)) return maybe_o; | 5984 if (!maybe_o->ToObject(&o)) return maybe_o; |
| 5955 } | 5985 } |
| 5956 SeqOneByteString* result = SeqOneByteString::cast(o); | 5986 SeqOneByteString* result = SeqOneByteString::cast(o); |
| 5987 #ifndef ENABLE_LATIN_1 | |
| 5957 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( | 5988 bool has_changed_character = ConvertTraits::AsciiConverter::Convert( |
| 5958 reinterpret_cast<char*>(result->GetChars()), | 5989 reinterpret_cast<char*>(result->GetChars()), |
| 5959 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | 5990 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), |
| 5960 length); | 5991 length); |
| 5961 return has_changed_character ? result : s; | 5992 return has_changed_character ? result : s; |
| 5993 #else | |
| 5994 bool has_changed_character; | |
| 5995 bool is_ascii = ConvertTraits::AsciiConverter::Convert( | |
| 5996 reinterpret_cast<char*>(result->GetChars()), | |
| 5997 reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()), | |
| 5998 length, | |
| 5999 &has_changed_character); | |
| 6000 // If not ASCII, we discard the result and take the 2 byte path. | |
| 6001 if (is_ascii) { | |
|
Yang
2013/01/14 15:56:51
It looks like we could save time if Convert failed
| |
| 6002 return has_changed_character ? result : s; | |
| 6003 } | |
| 6004 #endif | |
| 5962 } | 6005 } |
| 5963 #endif | |
| 5964 | 6006 |
| 5965 Object* answer; | 6007 Object* answer; |
| 5966 { MaybeObject* maybe_answer = | 6008 { MaybeObject* maybe_answer = |
| 5967 ConvertCaseHelper(isolate, s, length, length, mapping); | 6009 ConvertCaseHelper(isolate, s, length, length, mapping); |
| 5968 if (!maybe_answer->ToObject(&answer)) return maybe_answer; | 6010 if (!maybe_answer->ToObject(&answer)) return maybe_answer; |
| 5969 } | 6011 } |
| 5970 if (answer->IsSmi()) { | 6012 if (answer->IsSmi()) { |
| 5971 // Retry with correct length. | 6013 // Retry with correct length. |
| 5972 { MaybeObject* maybe_answer = | 6014 { MaybeObject* maybe_answer = |
| 5973 ConvertCaseHelper(isolate, | 6015 ConvertCaseHelper(isolate, |
| (...skipping 7669 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 13643 // Handle last resort GC and make sure to allow future allocations | 13685 // Handle last resort GC and make sure to allow future allocations |
| 13644 // to grow the heap without causing GCs (if possible). | 13686 // to grow the heap without causing GCs (if possible). |
| 13645 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13687 isolate->counters()->gc_last_resort_from_js()->Increment(); |
| 13646 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 13688 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
| 13647 "Runtime::PerformGC"); | 13689 "Runtime::PerformGC"); |
| 13648 } | 13690 } |
| 13649 } | 13691 } |
| 13650 | 13692 |
| 13651 | 13693 |
| 13652 } } // namespace v8::internal | 13694 } } // namespace v8::internal |
| OLD | NEW |