Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 46 // of the string, but it is a safe approximation. | 46 // of the string, but it is a safe approximation. |
| 47 static const int kBMMaxShift = Isolate::kBMMaxShift; | 47 static const int kBMMaxShift = Isolate::kBMMaxShift; |
| 48 | 48 |
| 49 // Reduce alphabet to this size. | 49 // Reduce alphabet to this size. |
| 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size | 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size |
| 51 // proportional to the input alphabet. We reduce the alphabet size by | 51 // proportional to the input alphabet. We reduce the alphabet size by |
| 52 // equating input characters modulo a smaller alphabet size. This gives | 52 // equating input characters modulo a smaller alphabet size. This gives |
| 53 // a potentially less efficient searching, but is a safe approximation. | 53 // a potentially less efficient searching, but is a safe approximation. |
| 54 // For needles using only characters in the same Unicode 256-code point page, | 54 // For needles using only characters in the same Unicode 256-code point page, |
| 55 // there is no search speed degradation. | 55 // there is no search speed degradation. |
| 56 #ifndef ENABLE_LATIN_1 | |
| 56 static const int kAsciiAlphabetSize = 128; | 57 static const int kAsciiAlphabetSize = 128; |
| 58 #else | |
| 59 static const int kAsciiAlphabetSize = 256; | |
| 60 #endif | |
| 57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; | 61 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; |
| 58 | 62 |
| 59 // Bad-char shift table stored in the state. It's length is the alphabet size. | 63 // Bad-char shift table stored in the state. It's length is the alphabet size. |
| 60 // For patterns below this length, the skip length of Boyer-Moore is too short | 64 // For patterns below this length, the skip length of Boyer-Moore is too short |
| 61 // to compensate for the algorithmic overhead compared to simple brute force. | 65 // to compensate for the algorithmic overhead compared to simple brute force. |
| 62 static const int kBMMinPatternLength = 7; | 66 static const int kBMMinPatternLength = 7; |
| 63 | 67 |
| 64 static inline bool IsOneByteString(Vector<const char> string) { | 68 static inline bool IsOneByteString(Vector<const uint8_t> string) { |
| 65 return true; | 69 return true; |
| 66 } | 70 } |
| 67 | 71 |
| 68 static inline bool IsOneByteString(Vector<const uc16> string) { | 72 static inline bool IsOneByteString(Vector<const uc16> string) { |
| 69 return String::IsOneByte(string.start(), string.length()); | 73 return String::IsOneByte(string.start(), string.length()); |
| 70 } | 74 } |
| 71 | 75 |
| 72 friend class Isolate; | 76 friend class Isolate; |
| 73 }; | 77 }; |
| 74 | 78 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 143 int start_index); | 147 int start_index); |
| 144 | 148 |
| 145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search, | 149 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search, |
| 146 Vector<const SubjectChar> subject, | 150 Vector<const SubjectChar> subject, |
| 147 int start_index); | 151 int start_index); |
| 148 | 152 |
| 149 void PopulateBoyerMooreHorspoolTable(); | 153 void PopulateBoyerMooreHorspoolTable(); |
| 150 | 154 |
| 151 void PopulateBoyerMooreTable(); | 155 void PopulateBoyerMooreTable(); |
| 152 | 156 |
| 157 static inline bool exceedsOneByte(uint8_t c) { | |
| 158 return false; | |
|
Yang
2013/01/09 15:39:30
Don't we still need to gate this with ENABLE_LATIN
| |
| 159 } | |
| 160 | |
| 161 static inline bool exceedsOneByte(uint16_t c) { | |
| 162 return c > String::kMaxOneByteCharCodeU; | |
| 163 } | |
| 164 | |
| 153 static inline int CharOccurrence(int* bad_char_occurrence, | 165 static inline int CharOccurrence(int* bad_char_occurrence, |
| 154 SubjectChar char_code) { | 166 SubjectChar char_code) { |
| 155 if (sizeof(SubjectChar) == 1) { | 167 if (sizeof(SubjectChar) == 1) { |
| 156 return bad_char_occurrence[static_cast<int>(char_code)]; | 168 return bad_char_occurrence[static_cast<int>(char_code)]; |
| 157 } | 169 } |
| 158 if (sizeof(PatternChar) == 1) { | 170 if (sizeof(PatternChar) == 1) { |
| 159 if (static_cast<unsigned int>(char_code) > String::kMaxOneByteCharCodeU) { | 171 if (exceedsOneByte(char_code)) { |
| 160 return -1; | 172 return -1; |
| 161 } | 173 } |
| 162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 174 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; |
| 163 } | 175 } |
| 164 // Both pattern and subject are UC16. Reduce character to equivalence class. | 176 // Both pattern and subject are UC16. Reduce character to equivalence class. |
| 165 int equiv_class = char_code % kUC16AlphabetSize; | 177 int equiv_class = char_code % kUC16AlphabetSize; |
| 166 return bad_char_occurrence[equiv_class]; | 178 return bad_char_occurrence[equiv_class]; |
| 167 } | 179 } |
| 168 | 180 |
| 169 // The following tables are shared by all searches. | 181 // The following tables are shared by all searches. |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 216 int i = index; | 228 int i = index; |
| 217 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 229 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| 218 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 230 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| 219 memchr(subject.start() + i, | 231 memchr(subject.start() + i, |
| 220 pattern_first_char, | 232 pattern_first_char, |
| 221 subject.length() - i)); | 233 subject.length() - i)); |
| 222 if (pos == NULL) return -1; | 234 if (pos == NULL) return -1; |
| 223 return static_cast<int>(pos - subject.start()); | 235 return static_cast<int>(pos - subject.start()); |
| 224 } else { | 236 } else { |
| 225 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 237 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
| 226 if (static_cast<uc16>(pattern_first_char) > | 238 if (exceedsOneByte(pattern_first_char)) { |
| 227 String::kMaxOneByteCharCodeU) { | |
| 228 return -1; | 239 return -1; |
| 229 } | 240 } |
| 230 } | 241 } |
| 231 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 242 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
| 232 int n = subject.length(); | 243 int n = subject.length(); |
| 233 while (i < n) { | 244 while (i < n) { |
| 234 if (subject[i++] == search_char) return i - 1; | 245 if (subject[i++] == search_char) return i - 1; |
| 235 } | 246 } |
| 236 return -1; | 247 return -1; |
| 237 } | 248 } |
| (...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 564 Vector<const SubjectChar> subject, | 575 Vector<const SubjectChar> subject, |
| 565 Vector<const PatternChar> pattern, | 576 Vector<const PatternChar> pattern, |
| 566 int start_index) { | 577 int start_index) { |
| 567 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 578 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 568 return search.Search(subject, start_index); | 579 return search.Search(subject, start_index); |
| 569 } | 580 } |
| 570 | 581 |
| 571 }} // namespace v8::internal | 582 }} // namespace v8::internal |
| 572 | 583 |
| 573 #endif // V8_STRING_SEARCH_H_ | 584 #endif // V8_STRING_SEARCH_H_ |
| OLD | NEW |