OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 Handle<String> pattern, | 86 Handle<String> pattern, |
87 JSRegExp::Flags flags, | 87 JSRegExp::Flags flags, |
88 int capture_register_count); | 88 int capture_register_count); |
89 | 89 |
90 | 90 |
91 static void AtomCompile(Handle<JSRegExp> re, | 91 static void AtomCompile(Handle<JSRegExp> re, |
92 Handle<String> pattern, | 92 Handle<String> pattern, |
93 JSRegExp::Flags flags, | 93 JSRegExp::Flags flags, |
94 Handle<String> match_pattern); | 94 Handle<String> match_pattern); |
95 | 95 |
96 | |
97 static int AtomExecRaw(Handle<JSRegExp> regexp, | |
98 Handle<String> subject, | |
99 int index, | |
100 int32_t* output, | |
101 int output_size); | |
102 | |
103 | |
104 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, | 96 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, |
105 Handle<String> subject, | 97 Handle<String> subject, |
106 int index, | 98 int index, |
107 Handle<JSArray> lastMatchInfo); | 99 Handle<JSArray> lastMatchInfo); |
108 | 100 |
109 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; | 101 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; |
110 | 102 |
111 // Prepare a RegExp for being executed one or more times (using | 103 // Prepare a RegExp for being executed one or more times (using |
112 // IrregexpExecOnce) on the subject. | 104 // IrregexpExecOnce) on the subject. |
113 // This ensures that the regexp is compiled for the subject, and that | 105 // This ensures that the regexp is compiled for the subject, and that |
114 // the subject is flat. | 106 // the subject is flat. |
115 // Returns the number of integer spaces required by IrregexpExecOnce | 107 // Returns the number of integer spaces required by IrregexpExecOnce |
116 // as its "registers" argument. If the regexp cannot be compiled, | 108 // as its "registers" argument. If the regexp cannot be compiled, |
117 // an exception is set as pending, and this function returns negative. | 109 // an exception is set as pending, and this function returns negative. |
118 static int IrregexpPrepare(Handle<JSRegExp> regexp, | 110 static int IrregexpPrepare(Handle<JSRegExp> regexp, |
119 Handle<String> subject); | 111 Handle<String> subject); |
120 | 112 |
| 113 // Calculate the size of offsets vector for the case of global regexp |
| 114 // and the number of matches this vector is able to store. |
| 115 static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, |
| 116 int registers_per_match, |
| 117 int* max_matches); |
| 118 |
121 // Execute a regular expression on the subject, starting from index. | 119 // Execute a regular expression on the subject, starting from index. |
122 // If matching succeeds, return the number of matches. This can be larger | 120 // If matching succeeds, return the number of matches. This can be larger |
123 // than one in the case of global regular expressions. | 121 // than one in the case of global regular expressions. |
124 // The captures and subcaptures are stored into the registers vector. | 122 // The captures and subcaptures are stored into the registers vector. |
125 // If matching fails, returns RE_FAILURE. | 123 // If matching fails, returns RE_FAILURE. |
126 // If execution fails, sets a pending exception and returns RE_EXCEPTION. | 124 // If execution fails, sets a pending exception and returns RE_EXCEPTION. |
127 static int IrregexpExecRaw(Handle<JSRegExp> regexp, | 125 static int IrregexpExecRaw(Handle<JSRegExp> regexp, |
128 Handle<String> subject, | 126 Handle<String> subject, |
129 int index, | 127 int index, |
130 int32_t* output, | 128 Vector<int> registers); |
131 int output_size); | |
132 | 129 |
133 // Execute an Irregexp bytecode pattern. | 130 // Execute an Irregexp bytecode pattern. |
134 // On a successful match, the result is a JSArray containing | 131 // On a successful match, the result is a JSArray containing |
135 // captured positions. On a failure, the result is the null value. | 132 // captured positions. On a failure, the result is the null value. |
136 // Returns an empty handle in case of an exception. | 133 // Returns an empty handle in case of an exception. |
137 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, | 134 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, |
138 Handle<String> subject, | 135 Handle<String> subject, |
139 int index, | 136 int index, |
140 Handle<JSArray> lastMatchInfo); | 137 Handle<JSArray> lastMatchInfo); |
141 | 138 |
142 // Set last match info. If match is NULL, then setting captures is omitted. | |
143 static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info, | |
144 Handle<String> subject, | |
145 int capture_count, | |
146 int32_t* match); | |
147 | |
148 | |
149 class GlobalCache { | |
150 public: | |
151 GlobalCache(Handle<JSRegExp> regexp, | |
152 Handle<String> subject, | |
153 bool is_global, | |
154 Isolate* isolate); | |
155 | |
156 ~GlobalCache(); | |
157 | |
158 // Fetch the next entry in the cache for global regexp match results. | |
159 // This does not set the last match info. Upon failure, NULL is returned. | |
160 // The cause can be checked with Result(). The previous | |
161 // result is still in available in memory when a failure happens. | |
162 int32_t* FetchNext(); | |
163 | |
164 int32_t* LastSuccessfulMatch(); | |
165 | |
166 inline bool HasException() { return num_matches_ < 0; } | |
167 | |
168 private: | |
169 int num_matches_; | |
170 int max_matches_; | |
171 int current_match_index_; | |
172 int registers_per_match_; | |
173 // Pointer to the last set of captures. | |
174 int32_t* register_array_; | |
175 int register_array_size_; | |
176 Handle<JSRegExp> regexp_; | |
177 Handle<String> subject_; | |
178 }; | |
179 | |
180 | |
181 // Array index in the lastMatchInfo array. | 139 // Array index in the lastMatchInfo array. |
182 static const int kLastCaptureCount = 0; | 140 static const int kLastCaptureCount = 0; |
183 static const int kLastSubject = 1; | 141 static const int kLastSubject = 1; |
184 static const int kLastInput = 2; | 142 static const int kLastInput = 2; |
185 static const int kFirstCapture = 3; | 143 static const int kFirstCapture = 3; |
186 static const int kLastMatchOverhead = 3; | 144 static const int kLastMatchOverhead = 3; |
187 | 145 |
188 // Direct offset into the lastMatchInfo array. | 146 // Direct offset into the lastMatchInfo array. |
189 static const int kLastCaptureCountOffset = | 147 static const int kLastCaptureCountOffset = |
190 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; | 148 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
230 | 188 |
231 // Limit the space regexps take up on the heap. In order to limit this we | 189 // Limit the space regexps take up on the heap. In order to limit this we |
232 // would like to keep track of the amount of regexp code on the heap. This | 190 // would like to keep track of the amount of regexp code on the heap. This |
233 // is not tracked, however. As a conservative approximation we track the | 191 // is not tracked, however. As a conservative approximation we track the |
234 // total regexp code compiled including code that has subsequently been freed | 192 // total regexp code compiled including code that has subsequently been freed |
235 // and the total executable memory at any point. | 193 // and the total executable memory at any point. |
236 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 194 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
237 static const int kRegWxpCompiledLimit = 1 * MB; | 195 static const int kRegWxpCompiledLimit = 1 * MB; |
238 | 196 |
239 private: | 197 private: |
| 198 static String* last_ascii_string_; |
| 199 static String* two_byte_cached_string_; |
| 200 |
240 static bool CompileIrregexp( | 201 static bool CompileIrregexp( |
241 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 202 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
242 static inline bool EnsureCompiledIrregexp( | 203 static inline bool EnsureCompiledIrregexp( |
243 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); | 204 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| 205 |
| 206 |
| 207 // Set the subject cache. The previous string buffer is not deleted, so the |
| 208 // caller should ensure that it doesn't leak. |
| 209 static void SetSubjectCache(String* subject, |
| 210 char* utf8_subject, |
| 211 int uft8_length, |
| 212 int character_position, |
| 213 int utf8_position); |
| 214 |
| 215 // A one element cache of the last utf8_subject string and its length. The |
| 216 // subject JS String object is cached in the heap. We also cache a |
| 217 // translation between position and utf8 position. |
| 218 static char* utf8_subject_cache_; |
| 219 static int utf8_length_cache_; |
| 220 static int utf8_position_; |
| 221 static int character_position_; |
244 }; | 222 }; |
245 | 223 |
246 | 224 |
247 // Represents the location of one element relative to the intersection of | 225 // Represents the location of one element relative to the intersection of |
248 // two sets. Corresponds to the four areas of a Venn diagram. | 226 // two sets. Corresponds to the four areas of a Venn diagram. |
249 enum ElementInSetsRelation { | 227 enum ElementInSetsRelation { |
250 kInsideNone = 0, | 228 kInsideNone = 0, |
251 kInsideFirst = 1, | 229 kInsideFirst = 1, |
252 kInsideSecond = 2, | 230 kInsideSecond = 2, |
253 kInsideBoth = 3 | 231 kInsideBoth = 3 |
(...skipping 1383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1637 bool global, | 1615 bool global, |
1638 bool multiline, | 1616 bool multiline, |
1639 Handle<String> pattern, | 1617 Handle<String> pattern, |
1640 Handle<String> sample_subject, | 1618 Handle<String> sample_subject, |
1641 bool is_ascii, Zone* zone); | 1619 bool is_ascii, Zone* zone); |
1642 | 1620 |
1643 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1621 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1644 }; | 1622 }; |
1645 | 1623 |
1646 | 1624 |
| 1625 class OffsetsVector { |
| 1626 public: |
| 1627 inline OffsetsVector(int num_registers, Isolate* isolate) |
| 1628 : offsets_vector_length_(num_registers) { |
| 1629 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| 1630 vector_ = NewArray<int>(offsets_vector_length_); |
| 1631 } else { |
| 1632 vector_ = isolate->jsregexp_static_offsets_vector(); |
| 1633 } |
| 1634 } |
| 1635 inline ~OffsetsVector() { |
| 1636 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| 1637 DeleteArray(vector_); |
| 1638 vector_ = NULL; |
| 1639 } |
| 1640 } |
| 1641 inline int* vector() { return vector_; } |
| 1642 inline int length() { return offsets_vector_length_; } |
| 1643 |
| 1644 static const int kStaticOffsetsVectorSize = |
| 1645 Isolate::kJSRegexpStaticOffsetsVectorSize; |
| 1646 |
| 1647 private: |
| 1648 static Address static_offsets_vector_address(Isolate* isolate) { |
| 1649 return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()); |
| 1650 } |
| 1651 |
| 1652 int* vector_; |
| 1653 int offsets_vector_length_; |
| 1654 |
| 1655 friend class ExternalReference; |
| 1656 }; |
| 1657 |
| 1658 |
1647 } } // namespace v8::internal | 1659 } } // namespace v8::internal |
1648 | 1660 |
1649 #endif // V8_JSREGEXP_H_ | 1661 #endif // V8_JSREGEXP_H_ |
OLD | NEW |