OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
46 // of the string, but it is a safe approximation. | 46 // of the string, but it is a safe approximation. |
47 static const int kBMMaxShift = Isolate::kBMMaxShift; | 47 static const int kBMMaxShift = Isolate::kBMMaxShift; |
48 | 48 |
49 // Reduce alphabet to this size. | 49 // Reduce alphabet to this size. |
50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size | 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size |
51 // proportional to the input alphabet. We reduce the alphabet size by | 51 // proportional to the input alphabet. We reduce the alphabet size by |
52 // equating input characters modulo a smaller alphabet size. This gives | 52 // equating input characters modulo a smaller alphabet size. This gives |
53 // a potentially less efficient searching, but is a safe approximation. | 53 // a potentially less efficient searching, but is a safe approximation. |
54 // For needles using only characters in the same Unicode 256-code point page, | 54 // For needles using only characters in the same Unicode 256-code point page, |
55 // there is no search speed degradation. | 55 // there is no search speed degradation. |
56 #ifndef ENABLE_LATIN_1 | |
56 static const int kAsciiAlphabetSize = 128; | 57 static const int kAsciiAlphabetSize = 128; |
58 #else | |
59 static const int kAsciiAlphabetSize = 256; | |
60 #endif | |
57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; | 61 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; |
58 | 62 |
59 // Bad-char shift table stored in the state. It's length is the alphabet size. | 63 // Bad-char shift table stored in the state. It's length is the alphabet size. |
60 // For patterns below this length, the skip length of Boyer-Moore is too short | 64 // For patterns below this length, the skip length of Boyer-Moore is too short |
61 // to compensate for the algorithmic overhead compared to simple brute force. | 65 // to compensate for the algorithmic overhead compared to simple brute force. |
62 static const int kBMMinPatternLength = 7; | 66 static const int kBMMinPatternLength = 7; |
63 | 67 |
64 static inline bool IsOneByteString(Vector<const char> string) { | 68 static inline bool IsOneByteString(Vector<const uint8_t> string) { |
65 return true; | 69 return true; |
66 } | 70 } |
67 | 71 |
68 static inline bool IsOneByteString(Vector<const uc16> string) { | 72 static inline bool IsOneByteString(Vector<const uc16> string) { |
69 return String::IsOneByte(string.start(), string.length()); | 73 return String::IsOneByte(string.start(), string.length()); |
70 } | 74 } |
71 | 75 |
72 friend class Isolate; | 76 friend class Isolate; |
73 }; | 77 }; |
74 | 78 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
143 int start_index); | 147 int start_index); |
144 | 148 |
145 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search, | 149 static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search, |
146 Vector<const SubjectChar> subject, | 150 Vector<const SubjectChar> subject, |
147 int start_index); | 151 int start_index); |
148 | 152 |
149 void PopulateBoyerMooreHorspoolTable(); | 153 void PopulateBoyerMooreHorspoolTable(); |
150 | 154 |
151 void PopulateBoyerMooreTable(); | 155 void PopulateBoyerMooreTable(); |
152 | 156 |
157 static inline bool exceedsOneByte(uint8_t c) { | |
158 return false; | |
Yang
2013/01/09 15:39:30
Don't we still need to gate this with ENABLE_LATIN
| |
159 } | |
160 | |
161 static inline bool exceedsOneByte(uint16_t c) { | |
162 return c > String::kMaxOneByteCharCodeU; | |
163 } | |
164 | |
153 static inline int CharOccurrence(int* bad_char_occurrence, | 165 static inline int CharOccurrence(int* bad_char_occurrence, |
154 SubjectChar char_code) { | 166 SubjectChar char_code) { |
155 if (sizeof(SubjectChar) == 1) { | 167 if (sizeof(SubjectChar) == 1) { |
156 return bad_char_occurrence[static_cast<int>(char_code)]; | 168 return bad_char_occurrence[static_cast<int>(char_code)]; |
157 } | 169 } |
158 if (sizeof(PatternChar) == 1) { | 170 if (sizeof(PatternChar) == 1) { |
159 if (static_cast<unsigned int>(char_code) > String::kMaxOneByteCharCodeU) { | 171 if (exceedsOneByte(char_code)) { |
160 return -1; | 172 return -1; |
161 } | 173 } |
162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 174 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; |
163 } | 175 } |
164 // Both pattern and subject are UC16. Reduce character to equivalence class. | 176 // Both pattern and subject are UC16. Reduce character to equivalence class. |
165 int equiv_class = char_code % kUC16AlphabetSize; | 177 int equiv_class = char_code % kUC16AlphabetSize; |
166 return bad_char_occurrence[equiv_class]; | 178 return bad_char_occurrence[equiv_class]; |
167 } | 179 } |
168 | 180 |
169 // The following tables are shared by all searches. | 181 // The following tables are shared by all searches. |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
216 int i = index; | 228 int i = index; |
217 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 229 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
218 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 230 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
219 memchr(subject.start() + i, | 231 memchr(subject.start() + i, |
220 pattern_first_char, | 232 pattern_first_char, |
221 subject.length() - i)); | 233 subject.length() - i)); |
222 if (pos == NULL) return -1; | 234 if (pos == NULL) return -1; |
223 return static_cast<int>(pos - subject.start()); | 235 return static_cast<int>(pos - subject.start()); |
224 } else { | 236 } else { |
225 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 237 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
226 if (static_cast<uc16>(pattern_first_char) > | 238 if (exceedsOneByte(pattern_first_char)) { |
227 String::kMaxOneByteCharCodeU) { | |
228 return -1; | 239 return -1; |
229 } | 240 } |
230 } | 241 } |
231 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 242 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
232 int n = subject.length(); | 243 int n = subject.length(); |
233 while (i < n) { | 244 while (i < n) { |
234 if (subject[i++] == search_char) return i - 1; | 245 if (subject[i++] == search_char) return i - 1; |
235 } | 246 } |
236 return -1; | 247 return -1; |
237 } | 248 } |
(...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
564 Vector<const SubjectChar> subject, | 575 Vector<const SubjectChar> subject, |
565 Vector<const PatternChar> pattern, | 576 Vector<const PatternChar> pattern, |
566 int start_index) { | 577 int start_index) { |
567 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 578 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
568 return search.Search(subject, start_index); | 579 return search.Search(subject, start_index); |
569 } | 580 } |
570 | 581 |
571 }} // namespace v8::internal | 582 }} // namespace v8::internal |
572 | 583 |
573 #endif // V8_STRING_SEARCH_H_ | 584 #endif // V8_STRING_SEARCH_H_ |
OLD | NEW |