| OLD | NEW |
| 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. |
| 2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // A DFA (deterministic finite automaton)-based regular expression search. | 5 // A DFA (deterministic finite automaton)-based regular expression search. |
| 6 // | 6 // |
| 7 // The DFA search has two main parts: the construction of the automaton, | 7 // The DFA search has two main parts: the construction of the automaton, |
| 8 // which is represented by a graph of State structures, and the execution | 8 // which is represented by a graph of State structures, and the execution |
| 9 // of the automaton over a given input string. | 9 // of the automaton over a given input string. |
| 10 // | 10 // |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 | 110 |
| 111 enum { | 111 enum { |
| 112 kByteEndText = 256, // imaginary byte at end of text | 112 kByteEndText = 256, // imaginary byte at end of text |
| 113 | 113 |
| 114 kFlagEmptyMask = 0xFFF, // State.flag_: bits holding kEmptyXXX flags | 114 kFlagEmptyMask = 0xFFF, // State.flag_: bits holding kEmptyXXX flags |
| 115 kFlagMatch = 0x1000, // State.flag_: this is a matching state | 115 kFlagMatch = 0x1000, // State.flag_: this is a matching state |
| 116 kFlagLastWord = 0x2000, // State.flag_: last byte was a word char | 116 kFlagLastWord = 0x2000, // State.flag_: last byte was a word char |
| 117 kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left | 117 kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left |
| 118 }; | 118 }; |
| 119 | 119 |
| 120 #ifndef STL_MSVC |
| 120 // STL function structures for use with unordered_set. | 121 // STL function structures for use with unordered_set. |
| 121 struct StateEqual { | 122 struct StateEqual { |
| 122 bool operator()(const State* a, const State* b) const { | 123 bool operator()(const State* a, const State* b) const { |
| 123 if (a == b) | 124 if (a == b) |
| 124 return true; | 125 return true; |
| 125 if (a == NULL || b == NULL) | 126 if (a == NULL || b == NULL) |
| 126 return false; | 127 return false; |
| 127 if (a->ninst_ != b->ninst_) | 128 if (a->ninst_ != b->ninst_) |
| 128 return false; | 129 return false; |
| 129 if (a->flag_ != b->flag_) | 130 if (a->flag_ != b->flag_) |
| 130 return false; | 131 return false; |
| 131 for (int i = 0; i < a->ninst_; i++) | 132 for (int i = 0; i < a->ninst_; i++) |
| 132 if (a->inst_[i] != b->inst_[i]) | 133 if (a->inst_[i] != b->inst_[i]) |
| 133 return false; | 134 return false; |
| 134 return true; // they're equal | 135 return true; // they're equal |
| 135 } | 136 } |
| 136 }; | 137 }; |
| 138 #endif // STL_MSVC |
| 137 struct StateHash { | 139 struct StateHash { |
| 138 size_t operator()(const State* a) const { | 140 size_t operator()(const State* a) const { |
| 139 if (a == NULL) | 141 if (a == NULL) |
| 140 return 0; | 142 return 0; |
| 141 const char* s = reinterpret_cast<const char*>(a->inst_); | 143 const char* s = reinterpret_cast<const char*>(a->inst_); |
| 142 int len = a->ninst_ * sizeof a->inst_[0]; | 144 int len = a->ninst_ * sizeof a->inst_[0]; |
| 143 if (sizeof(size_t) == sizeof(uint32)) | 145 if (sizeof(size_t) == sizeof(uint32)) |
| 144 return Hash32StringWithSeed(s, len, a->flag_); | 146 return Hash32StringWithSeed(s, len, a->flag_); |
| 145 else | 147 else |
| 146 return Hash64StringWithSeed(s, len, a->flag_); | 148 return Hash64StringWithSeed(s, len, a->flag_); |
| 147 } | 149 } |
| 150 #ifdef STL_MSVC |
| 151 // Less than operator. |
| 152 bool operator()(const State* a, const State* b) const { |
| 153 if (a == b) |
| 154 return false; |
| 155 if (a == NULL || b == NULL) |
| 156 return a == NULL; |
| 157 if (a->ninst_ != b->ninst_) |
| 158 return a->ninst_ < b->ninst_; |
| 159 if (a->flag_ != b->flag_) |
| 160 return a->flag_ < b->flag_; |
| 161 for (int i = 0; i < a->ninst_; ++i) |
| 162 if (a->inst_[i] != b->inst_[i]) |
| 163 return a->inst_[i] < b->inst_[i]; |
| 164 return false; // they're equal |
| 165 } |
| 166 // The two public members are required by msvc. 4 and 8 are default values. |
| 167 // Reference: http://msdn.microsoft.com/en-us/library/1s1byw77.aspx |
| 168 static const size_t bucket_size = 4; |
| 169 static const size_t min_buckets = 8; |
| 170 #endif // STL_MSVC |
| 148 }; | 171 }; |
| 149 | 172 |
| 173 #ifdef STL_MSVC |
| 174 typedef unordered_set<State*, StateHash> StateSet; |
| 175 #else // !STL_MSVC |
| 150 typedef unordered_set<State*, StateHash, StateEqual> StateSet; | 176 typedef unordered_set<State*, StateHash, StateEqual> StateSet; |
| 177 #endif // STL_MSVC |
| 151 | 178 |
| 152 | 179 |
| 153 private: | 180 private: |
| 154 // Special "firstbyte" values for a state. (Values >= 0 denote actual bytes.) | 181 // Special "firstbyte" values for a state. (Values >= 0 denote actual bytes.) |
| 155 enum { | 182 enum { |
| 156 kFbUnknown = -1, // No analysis has been performed. | 183 kFbUnknown = -1, // No analysis has been performed. |
| 157 kFbMany = -2, // Many bytes will lead out of this state. | 184 kFbMany = -2, // Many bytes will lead out of this state. |
| 158 kFbNone = -3, // No bytes lead out of this state. | 185 kFbNone = -3, // No bytes lead out of this state. |
| 159 }; | 186 }; |
| 160 | 187 |
| (...skipping 796 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 957 if (state == NULL) { | 984 if (state == NULL) { |
| 958 LOG(DFATAL) << "NULL state in RunStateOnByte"; | 985 LOG(DFATAL) << "NULL state in RunStateOnByte"; |
| 959 return NULL; | 986 return NULL; |
| 960 } | 987 } |
| 961 LOG(DFATAL) << "Unexpected special state in RunStateOnByte"; | 988 LOG(DFATAL) << "Unexpected special state in RunStateOnByte"; |
| 962 return NULL; | 989 return NULL; |
| 963 } | 990 } |
| 964 | 991 |
| 965 // If someone else already computed this, return it. | 992 // If someone else already computed this, return it. |
| 966 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering | 993 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering |
| 967 if (state->next_[ByteMap(c)]) | 994 State* ns = state->next_[ByteMap(c)]; |
| 968 return state->next_[ByteMap(c)]; | 995 ANNOTATE_HAPPENS_AFTER(ns); |
| 996 if (ns != NULL) |
| 997 return ns; |
| 969 | 998 |
| 970 // Convert state into Workq. | 999 // Convert state into Workq. |
| 971 StateToWorkq(state, q0_); | 1000 StateToWorkq(state, q0_); |
| 972 | 1001 |
| 973 // Flags marking the kinds of empty-width things (^ $ etc) | 1002 // Flags marking the kinds of empty-width things (^ $ etc) |
| 974 // around this byte. Before the byte we have the flags recorded | 1003 // around this byte. Before the byte we have the flags recorded |
| 975 // in the State structure itself. After the byte we have | 1004 // in the State structure itself. After the byte we have |
| 976 // nothing yet (but that will change: read on). | 1005 // nothing yet (but that will change: read on). |
| 977 uint needflag = state->flag_ >> kFlagNeedShift; | 1006 uint needflag = state->flag_ >> kFlagNeedShift; |
| 978 uint beforeflag = state->flag_ & kFlagEmptyMask; | 1007 uint beforeflag = state->flag_ & kFlagEmptyMask; |
| (...skipping 22 matching lines...) Expand all Loading... |
| 1001 beforeflag |= kEmptyWordBoundary; | 1030 beforeflag |= kEmptyWordBoundary; |
| 1002 | 1031 |
| 1003 // Okay, finally ready to run. | 1032 // Okay, finally ready to run. |
| 1004 // Only useful to rerun on empty string if there are new, useful flags. | 1033 // Only useful to rerun on empty string if there are new, useful flags. |
| 1005 if (beforeflag & ~oldbeforeflag & needflag) { | 1034 if (beforeflag & ~oldbeforeflag & needflag) { |
| 1006 RunWorkqOnEmptyString(q0_, q1_, beforeflag); | 1035 RunWorkqOnEmptyString(q0_, q1_, beforeflag); |
| 1007 swap(q0_, q1_); | 1036 swap(q0_, q1_); |
| 1008 } | 1037 } |
| 1009 bool ismatch = false; | 1038 bool ismatch = false; |
| 1010 RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_); | 1039 RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_); |
| 1011 swap(q0_, q1_); | 1040 |
| 1041 // Most of the time, we build the state from the output of |
| 1042 // RunWorkqOnByte, so swap q0_ and q1_ here. However, so that |
| 1043 // RE2::Set can tell exactly which match instructions |
| 1044 // contributed to the match, don't swap if c is kByteEndText. |
| 1045 // The resulting state wouldn't be correct for further processing |
| 1046 // of the string, but we're at the end of the text so that's okay. |
| 1047 // Leaving q0_ alone preseves the match instructions that led to |
| 1048 // the current setting of ismatch. |
| 1049 if (c != kByteEndText || kind_ != Prog::kManyMatch) |
| 1050 swap(q0_, q1_); |
| 1012 | 1051 |
| 1013 // Save afterflag along with ismatch and isword in new state. | 1052 // Save afterflag along with ismatch and isword in new state. |
| 1014 uint flag = afterflag; | 1053 uint flag = afterflag; |
| 1015 if (ismatch) | 1054 if (ismatch) |
| 1016 flag |= kFlagMatch; | 1055 flag |= kFlagMatch; |
| 1017 if (isword) | 1056 if (isword) |
| 1018 flag |= kFlagLastWord; | 1057 flag |= kFlagLastWord; |
| 1019 | 1058 |
| 1020 State* ns = WorkqToCachedState(q0_, flag); | 1059 ns = WorkqToCachedState(q0_, flag); |
| 1021 | 1060 |
| 1022 // Write barrier before updating state->next_ so that the | 1061 // Write barrier before updating state->next_ so that the |
| 1023 // main search loop can proceed without any locking, for speed. | 1062 // main search loop can proceed without any locking, for speed. |
| 1024 // (Otherwise it would need one mutex operation per input byte.) | 1063 // (Otherwise it would need one mutex operation per input byte.) |
| 1025 // The annotations below tell race detectors that: | 1064 // The annotations below tell race detectors that: |
| 1026 // a) the access to next_ should be ignored, | 1065 // a) the access to next_ should be ignored, |
| 1027 // b) 'ns' is properly published. | 1066 // b) 'ns' is properly published. |
| 1028 WriteMemoryBarrier(); // Flush ns before linking to it. | 1067 WriteMemoryBarrier(); // Flush ns before linking to it. |
| 1029 ANNOTATE_PUBLISH_MEMORY_RANGE(ns, sizeof(*ns)); | |
| 1030 | 1068 |
| 1031 ANNOTATE_IGNORE_WRITES_BEGIN(); | 1069 ANNOTATE_IGNORE_WRITES_BEGIN(); |
| 1070 ANNOTATE_HAPPENS_BEFORE(ns); |
| 1032 state->next_[ByteMap(c)] = ns; | 1071 state->next_[ByteMap(c)] = ns; |
| 1033 ANNOTATE_IGNORE_WRITES_END(); | 1072 ANNOTATE_IGNORE_WRITES_END(); |
| 1034 return ns; | 1073 return ns; |
| 1035 } | 1074 } |
| 1036 | 1075 |
| 1037 | 1076 |
| 1038 ////////////////////////////////////////////////////////////////////// | 1077 ////////////////////////////////////////////////////////////////////// |
| 1039 // DFA cache reset. | 1078 // DFA cache reset. |
| 1040 | 1079 |
| 1041 // Reader-writer lock helper. | 1080 // Reader-writer lock helper. |
| (...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1346 // ns == NULL means the state has not yet been computed | 1385 // ns == NULL means the state has not yet been computed |
| 1347 // (need to call RunStateOnByteUnlocked). | 1386 // (need to call RunStateOnByteUnlocked). |
| 1348 // RunStateOnByte returns ns == NULL if it is out of memory. | 1387 // RunStateOnByte returns ns == NULL if it is out of memory. |
| 1349 // ns == FullMatchState means the rest of the string matches. | 1388 // ns == FullMatchState means the rest of the string matches. |
| 1350 // | 1389 // |
| 1351 // Okay to use bytemap[] not ByteMap() here, because | 1390 // Okay to use bytemap[] not ByteMap() here, because |
| 1352 // c is known to be an actual byte and not kByteEndText. | 1391 // c is known to be an actual byte and not kByteEndText. |
| 1353 | 1392 |
| 1354 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering | 1393 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering |
| 1355 State* ns = s->next_[bytemap[c]]; | 1394 State* ns = s->next_[bytemap[c]]; |
| 1395 ANNOTATE_HAPPENS_AFTER(ns); |
| 1356 if (ns == NULL) { | 1396 if (ns == NULL) { |
| 1357 ns = RunStateOnByteUnlocked(s, c); | 1397 ns = RunStateOnByteUnlocked(s, c); |
| 1358 if (ns == NULL) { | 1398 if (ns == NULL) { |
| 1359 // After we reset the cache, we hold cache_mutex exclusively, | 1399 // After we reset the cache, we hold cache_mutex exclusively, |
| 1360 // so if resetp != NULL, it means we filled the DFA state | 1400 // so if resetp != NULL, it means we filled the DFA state |
| 1361 // cache with this search alone (without any other threads). | 1401 // cache with this search alone (without any other threads). |
| 1362 // Benchmarks show that doing a state computation on every | 1402 // Benchmarks show that doing a state computation on every |
| 1363 // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the | 1403 // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the |
| 1364 // same at about 2 MB/s. Unless we're processing an average | 1404 // same at about 2 MB/s. Unless we're processing an average |
| 1365 // of 10 bytes per state computation, fail so that RE2 can | 1405 // of 10 bytes per state computation, fail so that RE2 can |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1417 static_cast<int>(lastmatch - bp), | 1457 static_cast<int>(lastmatch - bp), |
| 1418 DumpState(s).c_str()); | 1458 DumpState(s).c_str()); |
| 1419 | 1459 |
| 1420 if (want_earliest_match) { | 1460 if (want_earliest_match) { |
| 1421 params->ep = reinterpret_cast<const char*>(lastmatch); | 1461 params->ep = reinterpret_cast<const char*>(lastmatch); |
| 1422 return true; | 1462 return true; |
| 1423 } | 1463 } |
| 1424 } | 1464 } |
| 1425 } | 1465 } |
| 1426 | 1466 |
| 1427 // Peek in state to see if a match is coming up. | |
| 1428 if (params->matches && kind_ == Prog::kManyMatch) { | |
| 1429 vector<int>* v = params->matches; | |
| 1430 v->clear(); | |
| 1431 if (s > SpecialStateMax) { | |
| 1432 for (int i = 0; i < s->ninst_; i++) { | |
| 1433 Prog::Inst* ip = prog_->inst(s->inst_[i]); | |
| 1434 if (ip->opcode() == kInstMatch) | |
| 1435 v->push_back(ip->match_id()); | |
| 1436 } | |
| 1437 } | |
| 1438 } | |
| 1439 | |
| 1440 | |
| 1441 // Process one more byte to see if it triggers a match. | 1467 // Process one more byte to see if it triggers a match. |
| 1442 // (Remember, matches are delayed one byte.) | 1468 // (Remember, matches are delayed one byte.) |
| 1443 int lastbyte; | 1469 int lastbyte; |
| 1444 if (run_forward) { | 1470 if (run_forward) { |
| 1445 if (params->text.end() == params->context.end()) | 1471 if (params->text.end() == params->context.end()) |
| 1446 lastbyte = kByteEndText; | 1472 lastbyte = kByteEndText; |
| 1447 else | 1473 else |
| 1448 lastbyte = params->text.end()[0] & 0xFF; | 1474 lastbyte = params->text.end()[0] & 0xFF; |
| 1449 } else { | 1475 } else { |
| 1450 if (params->text.begin() == params->context.begin()) | 1476 if (params->text.begin() == params->context.begin()) |
| 1451 lastbyte = kByteEndText; | 1477 lastbyte = kByteEndText; |
| 1452 else | 1478 else |
| 1453 lastbyte = params->text.begin()[-1] & 0xFF; | 1479 lastbyte = params->text.begin()[-1] & 0xFF; |
| 1454 } | 1480 } |
| 1455 | 1481 |
| 1456 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering | 1482 MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering |
| 1457 State* ns = s->next_[ByteMap(lastbyte)]; | 1483 State* ns = s->next_[ByteMap(lastbyte)]; |
| 1484 ANNOTATE_HAPPENS_AFTER(ns); |
| 1458 if (ns == NULL) { | 1485 if (ns == NULL) { |
| 1459 ns = RunStateOnByteUnlocked(s, lastbyte); | 1486 ns = RunStateOnByteUnlocked(s, lastbyte); |
| 1460 if (ns == NULL) { | 1487 if (ns == NULL) { |
| 1461 StateSaver save_s(this, s); | 1488 StateSaver save_s(this, s); |
| 1462 ResetCache(params->cache_lock); | 1489 ResetCache(params->cache_lock); |
| 1463 if ((s = save_s.Restore()) == NULL) { | 1490 if ((s = save_s.Restore()) == NULL) { |
| 1464 params->failed = true; | 1491 params->failed = true; |
| 1465 return false; | 1492 return false; |
| 1466 } | 1493 } |
| 1467 ns = RunStateOnByteUnlocked(s, lastbyte); | 1494 ns = RunStateOnByteUnlocked(s, lastbyte); |
| 1468 if (ns == NULL) { | 1495 if (ns == NULL) { |
| 1469 LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset"; | 1496 LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset"; |
| 1470 params->failed = true; | 1497 params->failed = true; |
| 1471 return false; | 1498 return false; |
| 1472 } | 1499 } |
| 1473 } | 1500 } |
| 1474 } | 1501 } |
| 1475 s = ns; | 1502 s = ns; |
| 1476 if (DebugDFA) | 1503 if (DebugDFA) |
| 1477 fprintf(stderr, "@_: %s\n", DumpState(s).c_str()); | 1504 fprintf(stderr, "@_: %s\n", DumpState(s).c_str()); |
| 1478 if (s == FullMatchState) { | 1505 if (s == FullMatchState) { |
| 1479 params->ep = reinterpret_cast<const char*>(ep); | 1506 params->ep = reinterpret_cast<const char*>(ep); |
| 1480 return true; | 1507 return true; |
| 1481 } | 1508 } |
| 1482 if (s > SpecialStateMax && s->IsMatch()) { | 1509 if (s > SpecialStateMax && s->IsMatch()) { |
| 1483 matched = true; | 1510 matched = true; |
| 1484 lastmatch = p; | 1511 lastmatch = p; |
| 1512 if (params->matches && kind_ == Prog::kManyMatch) { |
| 1513 vector<int>* v = params->matches; |
| 1514 v->clear(); |
| 1515 for (int i = 0; i < s->ninst_; i++) { |
| 1516 Prog::Inst* ip = prog_->inst(s->inst_[i]); |
| 1517 if (ip->opcode() == kInstMatch) |
| 1518 v->push_back(ip->match_id()); |
| 1519 } |
| 1520 } |
| 1485 if (DebugDFA) | 1521 if (DebugDFA) |
| 1486 fprintf(stderr, "match @%d! [%s]\n", static_cast<int>(lastmatch - bp), | 1522 fprintf(stderr, "match @%d! [%s]\n", static_cast<int>(lastmatch - bp), |
| 1487 DumpState(s).c_str()); | 1523 DumpState(s).c_str()); |
| 1488 } | 1524 } |
| 1489 params->ep = reinterpret_cast<const char*>(lastmatch); | 1525 params->ep = reinterpret_cast<const char*>(lastmatch); |
| 1490 return matched; | 1526 return matched; |
| 1491 } | 1527 } |
| 1492 | 1528 |
| 1493 // Inline specializations of the general loop. | 1529 // Inline specializations of the general loop. |
| 1494 bool DFA::SearchFFF(SearchParams* params) { | 1530 bool DFA::SearchFFF(SearchParams* params) { |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1632 return false; | 1668 return false; |
| 1633 } | 1669 } |
| 1634 } | 1670 } |
| 1635 | 1671 |
| 1636 if (DebugDFA) | 1672 if (DebugDFA) |
| 1637 fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n", | 1673 fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n", |
| 1638 params->anchored, params->run_forward, flags, | 1674 params->anchored, params->run_forward, flags, |
| 1639 DumpState(info->start).c_str(), info->firstbyte); | 1675 DumpState(info->start).c_str(), info->firstbyte); |
| 1640 | 1676 |
| 1641 params->start = info->start; | 1677 params->start = info->start; |
| 1642 params->firstbyte = info->firstbyte; | 1678 params->firstbyte = ANNOTATE_UNPROTECTED_READ(info->firstbyte); |
| 1643 | 1679 |
| 1644 return true; | 1680 return true; |
| 1645 } | 1681 } |
| 1646 | 1682 |
| 1647 // Fills in info if needed. Returns true on success, false on failure. | 1683 // Fills in info if needed. Returns true on success, false on failure. |
| 1648 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, | 1684 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, |
| 1649 uint flags) { | 1685 uint flags) { |
| 1650 // Quick check; okay because of memory barriers below. | 1686 // Quick check; okay because of memory barriers below. |
| 1651 if (info->firstbyte != kFbUnknown) | 1687 if (ANNOTATE_UNPROTECTED_READ(info->firstbyte) != kFbUnknown) { |
| 1688 ANNOTATE_HAPPENS_AFTER(&info->firstbyte); |
| 1652 return true; | 1689 return true; |
| 1690 } |
| 1653 | 1691 |
| 1654 MutexLock l(&mutex_); | 1692 MutexLock l(&mutex_); |
| 1655 if (info->firstbyte != kFbUnknown) | 1693 if (info->firstbyte != kFbUnknown) { |
| 1694 ANNOTATE_HAPPENS_AFTER(&info->firstbyte); |
| 1656 return true; | 1695 return true; |
| 1696 } |
| 1657 | 1697 |
| 1658 q0_->clear(); | 1698 q0_->clear(); |
| 1659 AddToQueue(q0_, | 1699 AddToQueue(q0_, |
| 1660 params->anchored ? prog_->start() : prog_->start_unanchored(), | 1700 params->anchored ? prog_->start() : prog_->start_unanchored(), |
| 1661 flags); | 1701 flags); |
| 1662 info->start = WorkqToCachedState(q0_, flags); | 1702 info->start = WorkqToCachedState(q0_, flags); |
| 1663 if (info->start == NULL) | 1703 if (info->start == NULL) |
| 1664 return false; | 1704 return false; |
| 1665 | 1705 |
| 1666 if (info->start == DeadState) { | 1706 if (info->start == DeadState) { |
| 1707 ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); |
| 1667 WriteMemoryBarrier(); // Synchronize with "quick check" above. | 1708 WriteMemoryBarrier(); // Synchronize with "quick check" above. |
| 1668 info->firstbyte = kFbNone; | 1709 info->firstbyte = kFbNone; |
| 1669 return true; | 1710 return true; |
| 1670 } | 1711 } |
| 1671 | 1712 |
| 1672 if (info->start == FullMatchState) { | 1713 if (info->start == FullMatchState) { |
| 1714 ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); |
| 1673 WriteMemoryBarrier(); // Synchronize with "quick check" above. | 1715 WriteMemoryBarrier(); // Synchronize with "quick check" above. |
| 1674 info->firstbyte = kFbNone; // will be ignored | 1716 info->firstbyte = kFbNone; // will be ignored |
| 1675 return true; | 1717 return true; |
| 1676 } | 1718 } |
| 1677 | 1719 |
| 1678 // Compute info->firstbyte by running state on all | 1720 // Compute info->firstbyte by running state on all |
| 1679 // possible byte values, looking for a single one that | 1721 // possible byte values, looking for a single one that |
| 1680 // leads to a different state. | 1722 // leads to a different state. |
| 1681 int firstbyte = kFbNone; | 1723 int firstbyte = kFbNone; |
| 1682 for (int i = 0; i < 256; i++) { | 1724 for (int i = 0; i < 256; i++) { |
| 1683 State* s = RunStateOnByte(info->start, i); | 1725 State* s = RunStateOnByte(info->start, i); |
| 1684 if (s == NULL) { | 1726 if (s == NULL) { |
| 1727 ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); |
| 1685 WriteMemoryBarrier(); // Synchronize with "quick check" above. | 1728 WriteMemoryBarrier(); // Synchronize with "quick check" above. |
| 1686 info->firstbyte = firstbyte; | 1729 info->firstbyte = firstbyte; |
| 1687 return false; | 1730 return false; |
| 1688 } | 1731 } |
| 1689 if (s == info->start) | 1732 if (s == info->start) |
| 1690 continue; | 1733 continue; |
| 1691 // Goes to new state... | 1734 // Goes to new state... |
| 1692 if (firstbyte == kFbNone) { | 1735 if (firstbyte == kFbNone) { |
| 1693 firstbyte = i; // ... first one | 1736 firstbyte = i; // ... first one |
| 1694 } else { | 1737 } else { |
| 1695 firstbyte = kFbMany; // ... too many | 1738 firstbyte = kFbMany; // ... too many |
| 1696 break; | 1739 break; |
| 1697 } | 1740 } |
| 1698 } | 1741 } |
| 1742 ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); |
| 1699 WriteMemoryBarrier(); // Synchronize with "quick check" above. | 1743 WriteMemoryBarrier(); // Synchronize with "quick check" above. |
| 1700 info->firstbyte = firstbyte; | 1744 info->firstbyte = firstbyte; |
| 1701 return true; | 1745 return true; |
| 1702 } | 1746 } |
| 1703 | 1747 |
| 1704 // The actual DFA search: calls AnalyzeSearch and then FastSearchLoop. | 1748 // The actual DFA search: calls AnalyzeSearch and then FastSearchLoop. |
| 1705 bool DFA::Search(const StringPiece& text, | 1749 bool DFA::Search(const StringPiece& text, |
| 1706 const StringPiece& context, | 1750 const StringPiece& context, |
| 1707 bool anchored, | 1751 bool anchored, |
| 1708 bool want_earliest_match, | 1752 bool want_earliest_match, |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1771 DFA* Prog::GetDFA(MatchKind kind) { | 1815 DFA* Prog::GetDFA(MatchKind kind) { |
| 1772 DFA*volatile* pdfa; | 1816 DFA*volatile* pdfa; |
| 1773 if (kind == kFirstMatch || kind == kManyMatch) { | 1817 if (kind == kFirstMatch || kind == kManyMatch) { |
| 1774 pdfa = &dfa_first_; | 1818 pdfa = &dfa_first_; |
| 1775 } else { | 1819 } else { |
| 1776 kind = kLongestMatch; | 1820 kind = kLongestMatch; |
| 1777 pdfa = &dfa_longest_; | 1821 pdfa = &dfa_longest_; |
| 1778 } | 1822 } |
| 1779 | 1823 |
| 1780 // Quick check; okay because of memory barrier below. | 1824 // Quick check; okay because of memory barrier below. |
| 1781 DFA *dfa = *pdfa; | 1825 DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa); |
| 1782 if (dfa != NULL) { | 1826 if (dfa != NULL) { |
| 1783 ANNOTATE_HAPPENS_AFTER(dfa); | 1827 ANNOTATE_HAPPENS_AFTER(dfa); |
| 1784 return dfa; | 1828 return dfa; |
| 1785 } | 1829 } |
| 1786 | 1830 |
| 1787 MutexLock l(&dfa_mutex_); | 1831 MutexLock l(&dfa_mutex_); |
| 1788 dfa = *pdfa; | 1832 dfa = *pdfa; |
| 1789 if (dfa != NULL) | 1833 if (dfa != NULL) { |
| 1834 ANNOTATE_HAPPENS_AFTER(dfa); |
| 1790 return dfa; | 1835 return dfa; |
| 1836 } |
| 1791 | 1837 |
| 1792 // For a forward DFA, half the memory goes to each DFA. | 1838 // For a forward DFA, half the memory goes to each DFA. |
| 1793 // For a reverse DFA, all the memory goes to the | 1839 // For a reverse DFA, all the memory goes to the |
| 1794 // "longest match" DFA, because RE2 never does reverse | 1840 // "longest match" DFA, because RE2 never does reverse |
| 1795 // "first match" searches. | 1841 // "first match" searches. |
| 1796 int64 m = dfa_mem_/2; | 1842 int64 m = dfa_mem_/2; |
| 1797 if (reversed_) { | 1843 if (reversed_) { |
| 1798 if (kind == kLongestMatch || kind == kManyMatch) | 1844 if (kind == kLongestMatch || kind == kManyMatch) |
| 1799 m = dfa_mem_; | 1845 m = dfa_mem_; |
| 1800 else | 1846 else |
| (...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2079 if (dfa_longest_ == NULL) { | 2125 if (dfa_longest_ == NULL) { |
| 2080 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); | 2126 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); |
| 2081 delete_dfa_ = DeleteDFA; | 2127 delete_dfa_ = DeleteDFA; |
| 2082 } | 2128 } |
| 2083 dfa = dfa_longest_; | 2129 dfa = dfa_longest_; |
| 2084 } | 2130 } |
| 2085 return dfa->PossibleMatchRange(min, max, maxlen); | 2131 return dfa->PossibleMatchRange(min, max, maxlen); |
| 2086 } | 2132 } |
| 2087 | 2133 |
| 2088 } // namespace re2 | 2134 } // namespace re2 |
| OLD | NEW |