| OLD | NEW | 
|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 //     * Redistributions of source code must retain the above copyright | 6 //     * Redistributions of source code must retain the above copyright | 
| 7 //       notice, this list of conditions and the following disclaimer. | 7 //       notice, this list of conditions and the following disclaimer. | 
| 8 //     * Redistributions in binary form must reproduce the above | 8 //     * Redistributions in binary form must reproduce the above | 
| 9 //       copyright notice, this list of conditions and the following | 9 //       copyright notice, this list of conditions and the following | 
| 10 //       disclaimer in the documentation and/or other materials provided | 10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 54   // For needles using only characters in the same Unicode 256-code point page, | 54   // For needles using only characters in the same Unicode 256-code point page, | 
| 55   // there is no search speed degradation. | 55   // there is no search speed degradation. | 
| 56   static const int kAsciiAlphabetSize = 128; | 56   static const int kAsciiAlphabetSize = 128; | 
| 57   static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; | 57   static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; | 
| 58 | 58 | 
| 59   // Bad-char shift table stored in the state. It's length is the alphabet size. | 59   // Bad-char shift table stored in the state. It's length is the alphabet size. | 
| 60   // For patterns below this length, the skip length of Boyer-Moore is too short | 60   // For patterns below this length, the skip length of Boyer-Moore is too short | 
| 61   // to compensate for the algorithmic overhead compared to simple brute force. | 61   // to compensate for the algorithmic overhead compared to simple brute force. | 
| 62   static const int kBMMinPatternLength = 7; | 62   static const int kBMMinPatternLength = 7; | 
| 63 | 63 | 
| 64   static inline bool IsAsciiString(Vector<const char>) { | 64   static inline bool IsOneByteString(Vector<const char> string) { | 
| 65     return true; | 65     return true; | 
| 66   } | 66   } | 
| 67 | 67 | 
| 68   static inline bool IsAsciiString(Vector<const uc16> string) { | 68   static inline bool IsOneByteString(Vector<const uc16> string) { | 
| 69     return String::IsAscii(string.start(), string.length()); | 69     return String::IsOneByte(string.start(), string.length()); | 
| 70   } | 70   } | 
| 71 | 71 | 
| 72   friend class Isolate; | 72   friend class Isolate; | 
| 73 }; | 73 }; | 
| 74 | 74 | 
| 75 | 75 | 
| 76 template <typename PatternChar, typename SubjectChar> | 76 template <typename PatternChar, typename SubjectChar> | 
| 77 class StringSearch : private StringSearchBase { | 77 class StringSearch : private StringSearchBase { | 
| 78  public: | 78  public: | 
| 79   StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) | 79   StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) | 
| 80       : isolate_(isolate), | 80       : isolate_(isolate), | 
| 81         pattern_(pattern), | 81         pattern_(pattern), | 
| 82         start_(Max(0, pattern.length() - kBMMaxShift)) { | 82         start_(Max(0, pattern.length() - kBMMaxShift)) { | 
| 83     if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 83     if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 
| 84       if (!IsAsciiString(pattern_)) { | 84       if (!IsOneByteString(pattern_)) { | 
| 85         strategy_ = &FailSearch; | 85         strategy_ = &FailSearch; | 
| 86         return; | 86         return; | 
| 87       } | 87       } | 
| 88     } | 88     } | 
| 89     int pattern_length = pattern_.length(); | 89     int pattern_length = pattern_.length(); | 
| 90     if (pattern_length < kBMMinPatternLength) { | 90     if (pattern_length < kBMMinPatternLength) { | 
| 91       if (pattern_length == 1) { | 91       if (pattern_length == 1) { | 
| 92         strategy_ = &SingleCharSearch; | 92         strategy_ = &SingleCharSearch; | 
| 93         return; | 93         return; | 
| 94       } | 94       } | 
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 149   void PopulateBoyerMooreHorspoolTable(); | 149   void PopulateBoyerMooreHorspoolTable(); | 
| 150 | 150 | 
| 151   void PopulateBoyerMooreTable(); | 151   void PopulateBoyerMooreTable(); | 
| 152 | 152 | 
| 153   static inline int CharOccurrence(int* bad_char_occurrence, | 153   static inline int CharOccurrence(int* bad_char_occurrence, | 
| 154                                    SubjectChar char_code) { | 154                                    SubjectChar char_code) { | 
| 155     if (sizeof(SubjectChar) == 1) { | 155     if (sizeof(SubjectChar) == 1) { | 
| 156       return bad_char_occurrence[static_cast<int>(char_code)]; | 156       return bad_char_occurrence[static_cast<int>(char_code)]; | 
| 157     } | 157     } | 
| 158     if (sizeof(PatternChar) == 1) { | 158     if (sizeof(PatternChar) == 1) { | 
| 159       if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { | 159       if (static_cast<unsigned int>(char_code) > String::kMaxOneByteCharCodeU) { | 
| 160         return -1; | 160         return -1; | 
| 161       } | 161       } | 
| 162       return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 162       return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 
| 163     } | 163     } | 
| 164     // Both pattern and subject are UC16. Reduce character to equivalence class. | 164     // Both pattern and subject are UC16. Reduce character to equivalence class. | 
| 165     int equiv_class = char_code % kUC16AlphabetSize; | 165     int equiv_class = char_code % kUC16AlphabetSize; | 
| 166     return bad_char_occurrence[equiv_class]; | 166     return bad_char_occurrence[equiv_class]; | 
| 167   } | 167   } | 
| 168 | 168 | 
| 169   // The following tables are shared by all searches. | 169   // The following tables are shared by all searches. | 
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 216   int i = index; | 216   int i = index; | 
| 217   if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 217   if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 
| 218     const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 218     const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 
| 219         memchr(subject.start() + i, | 219         memchr(subject.start() + i, | 
| 220                pattern_first_char, | 220                pattern_first_char, | 
| 221                subject.length() - i)); | 221                subject.length() - i)); | 
| 222     if (pos == NULL) return -1; | 222     if (pos == NULL) return -1; | 
| 223     return static_cast<int>(pos - subject.start()); | 223     return static_cast<int>(pos - subject.start()); | 
| 224   } else { | 224   } else { | 
| 225     if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 225     if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 
| 226       if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) { | 226       if (static_cast<uc16>(pattern_first_char) > | 
|  | 227             String::kMaxOneByteCharCodeU) { | 
| 227         return -1; | 228         return -1; | 
| 228       } | 229       } | 
| 229     } | 230     } | 
| 230     SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 231     SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 
| 231     int n = subject.length(); | 232     int n = subject.length(); | 
| 232     while (i < n) { | 233     while (i < n) { | 
| 233       if (subject[i++] == search_char) return i - 1; | 234       if (subject[i++] == search_char) return i - 1; | 
| 234     } | 235     } | 
| 235     return -1; | 236     return -1; | 
| 236   } | 237   } | 
| (...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 563                  Vector<const SubjectChar> subject, | 564                  Vector<const SubjectChar> subject, | 
| 564                  Vector<const PatternChar> pattern, | 565                  Vector<const PatternChar> pattern, | 
| 565                  int start_index) { | 566                  int start_index) { | 
| 566   StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 567   StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 
| 567   return search.Search(subject, start_index); | 568   return search.Search(subject, start_index); | 
| 568 } | 569 } | 
| 569 | 570 | 
| 570 }}  // namespace v8::internal | 571 }}  // namespace v8::internal | 
| 571 | 572 | 
| 572 #endif  // V8_STRING_SEARCH_H_ | 573 #endif  // V8_STRING_SEARCH_H_ | 
| OLD | NEW | 
|---|