Index: chrome/browser/safe_browsing/safe_browsing_database.cc |
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc |
index ea958d3d24445a944cfad9c9e6f5bb7acd896dda..784904eb6a2bed16750367f9808aa24272856fbc 100644 |
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc |
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc |
@@ -258,134 +258,13 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { |
return a.full_hash.prefix < b.full_hash.prefix; |
} |
-// As compared to the bloom filter, PrefixSet should have these |
-// properties: |
-// - Any bloom filter miss should be a prefix set miss. |
-// - Any prefix set hit should be a bloom filter hit. |
-// - Bloom filter false positives are prefix set misses. |
-// The following is to log actual performance to verify this. |
-enum PrefixSetEvent { |
- // Hits to prefix set and bloom filter. |
- PREFIX_SET_HIT, |
- PREFIX_SET_BLOOM_HIT, |
- // These were to track bloom misses which hit the prefix set, with |
- // _INVALID to track where the item didn't appear to actually be in |
- // the prefix set. _INVALID was never hit. |
- PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_OBSOLETE, |
- PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE, |
- // GetPrefixes() after creation failed to get the same prefixes. |
- PREFIX_SET_GETPREFIXES_BROKEN, |
- // Fine-grained tests which didn't provide any good direction. |
- PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE, |
- PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE, |
- PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE, |
- PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE, |
- PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE, |
- PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE, |
- PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE, |
- // Failed checksums when creating filters. |
- PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM, |
- PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM, |
- PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM, |
- PREFIX_SET_CREATE_PREFIXES_CHECKSUM, |
- PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM, |
- // Failed checksums when probing the filters. |
- PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM, |
- PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM, |
- |
- // Recorded once per update if the impossible occurs. |
- PREFIX_SET_BLOOM_MISS_PREFIX_HIT, |
- |
- // Corresponding CHECKSUM failed, but on retry it succeeded. |
- PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED, |
- PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED, |
- |
- // Memory space for histograms is determined by the max. ALWAYS ADD |
- // NEW VALUES BEFORE THIS ONE. |
- PREFIX_SET_EVENT_MAX |
-}; |
- |
-void RecordPrefixSetInfo(PrefixSetEvent event_type) { |
- UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type, |
- PREFIX_SET_EVENT_MAX); |
-} |
- |
// Helper to reduce code duplication. |
safe_browsing::PrefixSet* CreateEmptyPrefixSet() { |
return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); |
} |
-// Generate xor "checksum" of SBPrefix part of add prefixes. Pass 0 |
-// for |seed| will return a checksum, passing a previous checksum for |
-// |seed| will return 0 (if it checks out). Coded this way in hopes |
-// that the compiler won't optimize separate runs into a single |
-// temporary. |
-SBPrefix ChecksumAddPrefixes(SBPrefix seed, |
- const SBAddPrefixes& add_prefixes) { |
- SBPrefix checksum = seed; |
- for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); |
- iter != add_prefixes.end(); ++iter) { |
- checksum ^= iter->prefix; |
- } |
- return checksum; |
-} |
- |
-// Generate xor "checksum" of prefixes. |
-SBPrefix ChecksumPrefixes(SBPrefix seed, |
- const std::vector<SBPrefix>& prefixes) { |
- SBPrefix checksum = seed; |
- for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
- iter != prefixes.end(); ++iter) { |
- checksum ^= *iter; |
- } |
- return checksum; |
-} |
- |
-// Prefix set doesn't store duplicates, making it hard to verify that |
-// checksums match. Also hard is verifying that nothing was corrupted |
-// while removing duplicates from a vector. So this generates a |
-// checksum of only the unique elements. |
-SBPrefix ChecksumUniquePrefixes(const std::vector<SBPrefix>& prefixes) { |
- // Handle edge case up front. |
- if (prefixes.size() == 0) |
- return 0; |
- |
- std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
- SBPrefix checksum = *iter++; |
- while (iter != prefixes.end()) { |
- if (*(iter - 1) != *iter) |
- checksum ^= *iter; |
- iter++; |
- } |
- return checksum; |
-} |
- |
-// NOTE(shess): Past histograms have indicated that in a given day, |
-// about 1 in 100,000 updates result in a PrefixSet which was |
-// inconsistent relative to the BloomFilter. Windows is about 5x more |
-// likely to build an inconsistent PrefixSet than Mac. A number of |
-// developers have reviewed the code, and I ran extensive fuzzing with |
-// random data, so at this point I'm trying to demonstrate memory |
-// corruption. |
-// |
-// Other findings from past instrumentation: |
-// - half of one percent of brokenness cases implied duplicate items |
-// in |prefixes|. Per the code above, this should not be possible. |
-// - about 1/20 of broken cases happened more than once for a given |
-// user. Note that empty updates generally don't hit this code at |
-// all, so that may not imply a specific input pattern breaking |
-// things. |
-// - about 1/3 of broken cases show a checksum mismatch between the |
-// checksum calculated while creating |prefix_set| and the checksum |
-// calculated immediately after creation. This is almost certainly |
-// memory corruption. |
- |
// Generate |PrefixSet| and |BloomFilter| instances from the contents |
-// of |add_prefixes|. Verify that the results are internally |
-// consistent, and that the input maintained consistence while |
-// constructing (which should assure that they are mutually |
-// consistent). Returns an empty prefix set if any of the checks |
-// fail. |
+// of |add_prefixes|. |
void FiltersFromAddPrefixes( |
const SBAddPrefixes& add_prefixes, |
scoped_refptr<BloomFilter>* bloom_filter, |
@@ -398,8 +277,6 @@ void FiltersFromAddPrefixes( |
return; |
} |
- const SBPrefix add_prefixes_checksum = ChecksumAddPrefixes(0, add_prefixes); |
- |
// TODO(shess): If |add_prefixes| were sorted by the prefix, it |
// could be passed directly to |PrefixSet()|, removing the need for |
// |prefixes|. |
@@ -411,105 +288,12 @@ void FiltersFromAddPrefixes( |
} |
std::sort(prefixes.begin(), prefixes.end()); |
- const SBPrefix unique_prefixes_checksum = ChecksumUniquePrefixes(prefixes); |
- |
for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
iter != prefixes.end(); ++iter) { |
bloom_filter->get()->Insert(*iter); |
} |
prefix_set->reset(new safe_browsing::PrefixSet(prefixes)); |
- |
- // "unit test" for ChecksumUniquePrefixes() and GetPrefixesChecksum(). |
- if (DCHECK_IS_ON()) { |
- std::vector<SBPrefix> unique(prefixes); |
- unique.erase(std::unique(unique.begin(), unique.end()), unique.end()); |
- DCHECK_EQ(0, ChecksumPrefixes(unique_prefixes_checksum, unique)); |
- |
- std::vector<SBPrefix> restored; |
- prefix_set->get()->GetPrefixes(&restored); |
- DCHECK_EQ(0, ChecksumPrefixes(prefix_set->get()->GetPrefixesChecksum(), |
- restored)); |
- } |
- |
- // TODO(shess): Need a barrier to prevent ordering checks above here |
- // or construction below here? |
- |
- // NOTE(shess): So far, the fallout is: |
- // 605 PREFIX_SET_CHECKSUM (failed prefix_set->CheckChecksum()). |
- // 32 BLOOM_FILTER_CHECKSUM (failed bloom_filter->CheckChecksum()). |
- // 1 ADD_PREFIXES_CHECKSUM (contents of add_prefixes changed). |
- // 7 PREFIXES_CHECKSUM (contents of prefixes changed). |
- |
- bool get_prefixes_broken = |
- (unique_prefixes_checksum != prefix_set->get()->GetPrefixesChecksum()); |
- bool prefix_set_broken = !prefix_set->get()->CheckChecksum(); |
- bool bloom_filter_broken = !bloom_filter->get()->CheckChecksum(); |
- bool prefixes_input_broken = |
- (0 != ChecksumPrefixes(add_prefixes_checksum, prefixes)); |
- bool add_prefixes_input_broken = |
- (0 != ChecksumAddPrefixes(add_prefixes_checksum, add_prefixes)); |
- |
- if (add_prefixes_input_broken) { |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM); |
- } else if (prefixes_input_broken) { |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIXES_CHECKSUM); |
- } |
- |
- if (prefix_set_broken) { |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM); |
- |
- // Failing PrefixSet::CheckChecksum() implies that the set's |
- // internal data changed during construction. If the retry |
- // consistently succeeds, that implies memory corruption. If the |
- // retry consistently fails, that implies PrefixSet is broken. |
- scoped_ptr<safe_browsing::PrefixSet> retry_set( |
- new safe_browsing::PrefixSet(prefixes)); |
- if (retry_set->CheckChecksum()) |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED); |
- |
- // TODO(shess): In case of double failure, consider pinning the |
- // data and calling NOTREACHED(). But it's a lot of data. Could |
- // also implement a binary search to constrain the amount of input |
- // to consider, but that's a lot of complexity. |
- } |
- |
- // TODO(shess): Obviously this is a problem for operation of the |
- // bloom filter! But for purposes of checking prefix set operation, |
- // all that matters is not messing up the histograms recorded later. |
- if (bloom_filter_broken) { |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM); |
- |
- // As for PrefixSet, failing BloomFilter::CheckChecksum() implies |
- // that the filter's internal data was changed. |
- scoped_refptr<BloomFilter> retry_filter(new BloomFilter(filter_size)); |
- for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
- iter != prefixes.end(); ++iter) { |
- retry_filter->Insert(*iter); |
- } |
- if (retry_filter->CheckChecksum()) |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED); |
- } |
- |
- // Attempt to isolate the case where the output of GetPrefixes() |
- // would differ from the input presented. This case implies that |
- // PrefixSet has an actual implementation flaw, and may in the |
- // future warrant more aggressive action, such as somehow dumping |
- // |prefixes| up to the crash server. |
- if (get_prefixes_broken && !prefix_set_broken && !prefixes_input_broken) |
- RecordPrefixSetInfo(PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM); |
- |
- // If anything broke, clear the prefix set to prevent pollution of |
- // histograms generated later. |
- if (get_prefixes_broken || prefix_set_broken || bloom_filter_broken || |
- prefixes_input_broken || add_prefixes_input_broken) { |
- DCHECK(!get_prefixes_broken); |
- DCHECK(!prefix_set_broken); |
- DCHECK(!bloom_filter_broken); |
- DCHECK(!prefixes_input_broken); |
- DCHECK(!add_prefixes_input_broken); |
- prefix_set->reset(CreateEmptyPrefixSet()); |
- } |
} |
} // namespace |
@@ -764,59 +548,13 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
if (!browse_bloom_filter_.get()) |
return false; |
- DCHECK(prefix_set_.get()); |
- |
- // |prefix_set_| is empty until the first update, only log info if |
- // not empty. |
- const bool prefix_set_empty = !prefix_set_->GetSize(); |
- |
- // Track cases where the filters were not consistent with each other. |
- bool bloom_hit_prefix_miss = false; |
- bool bloom_miss_prefix_hit = false; |
size_t miss_count = 0; |
for (size_t i = 0; i < full_hashes.size(); ++i) { |
- bool found = prefix_set_->Exists(full_hashes[i].prefix); |
- |
if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { |
- if (!prefix_set_empty) { |
- RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT); |
- // This should be less than PREFIX_SET_BLOOM_HIT by the |
- // false positive rate. |
- if (found) { |
- RecordPrefixSetInfo(PREFIX_SET_HIT); |
- } else { |
- // Could be false positive, but _could_ be corruption. |
- bloom_hit_prefix_miss = true; |
- } |
- } |
prefix_hits->push_back(full_hashes[i].prefix); |
if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) |
++miss_count; |
- } else { |
- // Bloom filter misses should never be in prefix set. Check |
- // to see if the prefix set or bloom filter have changed since |
- // being created. |
- DCHECK(!found); |
- if (found && !prefix_set_empty) |
- bloom_miss_prefix_hit = true; |
- } |
- } |
- |
- // In case of inconsistent results, verify the checksums and record |
- // failures (in case of corruption inconsistencies would be |
- // expected). In case of corruption clear out |prefix_set_|, once |
- // we've noticed corruption there is no point to future comparisons. |
- if (bloom_miss_prefix_hit || bloom_hit_prefix_miss) { |
- if (!prefix_set_->CheckChecksum()) { |
- RecordPrefixSetInfo(PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM); |
- prefix_set_.reset(CreateEmptyPrefixSet()); |
- } else if (!browse_bloom_filter_->CheckChecksum()) { |
- RecordPrefixSetInfo(PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM); |
- prefix_set_.reset(CreateEmptyPrefixSet()); |
- } else if (bloom_miss_prefix_hit) { |
- // This case should be impossible if the filters are both valid. |
- RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_HIT); |
} |
} |