OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
54 // For needles using only characters in the same Unicode 256-code point page, | 54 // For needles using only characters in the same Unicode 256-code point page, |
55 // there is no search speed degradation. | 55 // there is no search speed degradation. |
56 static const int kAsciiAlphabetSize = 128; | 56 static const int kAsciiAlphabetSize = 128; |
57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; | 57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; |
58 | 58 |
59 // Bad-char shift table stored in the state. It's length is the alphabet size. | 59 // Bad-char shift table stored in the state. It's length is the alphabet size. |
60 // For patterns below this length, the skip length of Boyer-Moore is too short | 60 // For patterns below this length, the skip length of Boyer-Moore is too short |
61 // to compensate for the algorithmic overhead compared to simple brute force. | 61 // to compensate for the algorithmic overhead compared to simple brute force. |
62 static const int kBMMinPatternLength = 7; | 62 static const int kBMMinPatternLength = 7; |
63 | 63 |
64 static inline bool IsAsciiString(Vector<const char>) { | 64 static inline bool IsOneByteString(Vector<const char> string) { |
65 return true; | 65 return true; |
66 } | 66 } |
67 | 67 |
68 static inline bool IsAsciiString(Vector<const uc16> string) { | 68 static inline bool IsOneByteString(Vector<const uc16> string) { |
69 return String::IsAscii(string.start(), string.length()); | 69 return String::IsOneByte(string.start(), string.length()); |
70 } | 70 } |
71 | 71 |
72 friend class Isolate; | 72 friend class Isolate; |
73 }; | 73 }; |
74 | 74 |
75 | 75 |
76 template <typename PatternChar, typename SubjectChar> | 76 template <typename PatternChar, typename SubjectChar> |
77 class StringSearch : private StringSearchBase { | 77 class StringSearch : private StringSearchBase { |
78 public: | 78 public: |
79 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) | 79 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) |
80 : isolate_(isolate), | 80 : isolate_(isolate), |
81 pattern_(pattern), | 81 pattern_(pattern), |
82 start_(Max(0, pattern.length() - kBMMaxShift)) { | 82 start_(Max(0, pattern.length() - kBMMaxShift)) { |
83 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 83 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
84 if (!IsAsciiString(pattern_)) { | 84 if (!IsOneByteString(pattern_)) { |
85 strategy_ = &FailSearch; | 85 strategy_ = &FailSearch; |
86 return; | 86 return; |
87 } | 87 } |
88 } | 88 } |
89 int pattern_length = pattern_.length(); | 89 int pattern_length = pattern_.length(); |
90 if (pattern_length < kBMMinPatternLength) { | 90 if (pattern_length < kBMMinPatternLength) { |
91 if (pattern_length == 1) { | 91 if (pattern_length == 1) { |
92 strategy_ = &SingleCharSearch; | 92 strategy_ = &SingleCharSearch; |
93 return; | 93 return; |
94 } | 94 } |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 void PopulateBoyerMooreHorspoolTable(); | 149 void PopulateBoyerMooreHorspoolTable(); |
150 | 150 |
151 void PopulateBoyerMooreTable(); | 151 void PopulateBoyerMooreTable(); |
152 | 152 |
153 static inline int CharOccurrence(int* bad_char_occurrence, | 153 static inline int CharOccurrence(int* bad_char_occurrence, |
154 SubjectChar char_code) { | 154 SubjectChar char_code) { |
155 if (sizeof(SubjectChar) == 1) { | 155 if (sizeof(SubjectChar) == 1) { |
156 return bad_char_occurrence[static_cast<int>(char_code)]; | 156 return bad_char_occurrence[static_cast<int>(char_code)]; |
157 } | 157 } |
158 if (sizeof(PatternChar) == 1) { | 158 if (sizeof(PatternChar) == 1) { |
159 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { | 159 if (static_cast<unsigned int>(char_code) > String::kMaxOneByteCharCodeU) { |
160 return -1; | 160 return -1; |
161 } | 161 } |
162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; |
163 } | 163 } |
164 // Both pattern and subject are UC16. Reduce character to equivalence class. | 164 // Both pattern and subject are UC16. Reduce character to equivalence class. |
165 int equiv_class = char_code % kUC16AlphabetSize; | 165 int equiv_class = char_code % kUC16AlphabetSize; |
166 return bad_char_occurrence[equiv_class]; | 166 return bad_char_occurrence[equiv_class]; |
167 } | 167 } |
168 | 168 |
169 // The following tables are shared by all searches. | 169 // The following tables are shared by all searches. |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
216 int i = index; | 216 int i = index; |
217 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 217 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
218 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 218 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
219 memchr(subject.start() + i, | 219 memchr(subject.start() + i, |
220 pattern_first_char, | 220 pattern_first_char, |
221 subject.length() - i)); | 221 subject.length() - i)); |
222 if (pos == NULL) return -1; | 222 if (pos == NULL) return -1; |
223 return static_cast<int>(pos - subject.start()); | 223 return static_cast<int>(pos - subject.start()); |
224 } else { | 224 } else { |
225 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 225 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
226 if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) { | 226 if (static_cast<uc16>(pattern_first_char) > |
| 227 String::kMaxOneByteCharCodeU) { |
227 return -1; | 228 return -1; |
228 } | 229 } |
229 } | 230 } |
230 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 231 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
231 int n = subject.length(); | 232 int n = subject.length(); |
232 while (i < n) { | 233 while (i < n) { |
233 if (subject[i++] == search_char) return i - 1; | 234 if (subject[i++] == search_char) return i - 1; |
234 } | 235 } |
235 return -1; | 236 return -1; |
236 } | 237 } |
(...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
563 Vector<const SubjectChar> subject, | 564 Vector<const SubjectChar> subject, |
564 Vector<const PatternChar> pattern, | 565 Vector<const PatternChar> pattern, |
565 int start_index) { | 566 int start_index) { |
566 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 567 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
567 return search.Search(subject, start_index); | 568 return search.Search(subject, start_index); |
568 } | 569 } |
569 | 570 |
570 }} // namespace v8::internal | 571 }} // namespace v8::internal |
571 | 572 |
572 #endif // V8_STRING_SEARCH_H_ | 573 #endif // V8_STRING_SEARCH_H_ |
OLD | NEW |