| Index: chrome/browser/safe_browsing/safe_browsing_database.cc
|
| diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| index 4234ed0d9a06a58a32a69735e386ba3df694ceff..edbcd961b6a5e731407fe3ca229dcd87af4dcf8a 100644
|
| --- a/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| @@ -236,19 +236,26 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
|
| // - Bloom filter false positives are prefix set misses.
|
| // The following is to log actual performance to verify this.
|
| enum PrefixSetEvent {
|
| - PREFIX_SET_EVENT_HIT,
|
| - PREFIX_SET_EVENT_BLOOM_HIT,
|
| - PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
|
| - PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
|
| + // Hits to prefix set and bloom filter.
|
| + PREFIX_SET_HIT,
|
| + PREFIX_SET_BLOOM_HIT,
|
| + // These were to track bloom misses which hit the prefix set, with
|
| + // _INVALID to track where the item didn't appear to actually be in
|
| + // the prefix set. _INVALID was never hit.
|
| + PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT,
|
| + PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE,
|
| + // GetPrefixes() after creation failed to get the same prefixes.
|
| PREFIX_SET_GETPREFIXES_BROKEN,
|
| - PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
|
| - PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
|
| - PREFIX_SET_SBPREFIX_WAS_BROKEN,
|
| - PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
|
| - PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
|
| - PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
|
| - PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
|
| - PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
|
| + // Fine-grained tests which didn't provide any good direction.
|
| + PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE,
|
| + PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE,
|
| + PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE,
|
| + PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE,
|
| + PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE,
|
| + PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE,
|
| + PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE,
|
| + // Failed checksum when creating prefix set.
|
| + PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM,
|
|
|
| // Memory space for histograms is determined by the max. ALWAYS ADD
|
| // NEW VALUES BEFORE THIS ONE.
|
| @@ -260,10 +267,14 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) {
|
| PREFIX_SET_EVENT_MAX);
|
| }
|
|
|
| +// Helper to reduce code duplication.
|
| +safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
|
| + return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
|
| +}
|
| +
|
| // Generate a |PrefixSet| instance from the contents of
|
| // |add_prefixes|. Additionally performs various checks to make sure
|
| -// that the resulting prefix set is valid, so that the
|
| -// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
|
| +// that the resulting prefix set is valid, so that histograms in
|
| // ContainsBrowseUrl() can be trustworthy.
|
| safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
|
| const SBAddPrefixes& add_prefixes) {
|
| @@ -293,107 +304,35 @@ safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
|
| std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
|
| return prefix_set.release();
|
|
|
| - // Log BROKEN for continuity with previous release, and SIZE to
|
| - // distinguish which test failed.
|
| + // NOTE(shess): Past histograms have indicated that in a given day,
|
| + // about 1 in 100,000 updates result in a PrefixSet which was
|
| + // inconsistent relative to the BloomFilter. Windows is about 5x
|
| + // more likely to build an inconsistent PrefixSet than Mac. A
|
| + // number of developers have reviewed the code, and I ran extensive
|
| + // fuzzing with random data, so at this point I'm trying to
|
| + // demonstrate memory corruption.
|
| + //
|
| + // Other findings from past instrumentation:
|
| + // - half of one percent of brokenness cases implied duplicate items
|
| + // in |prefixes|. Per the code above, this should not be
|
| + // possible.
|
| + // - about 1/20 of broken cases happened more than once for a given
|
| + // user. Note that empty updates generally don't hit this code at
|
| + // all, so that may not imply a specific input pattern breaking things.
|
| + // - about 1/3 of broken cases show a checksum mismatch between the
|
| + // checksum calculated while creating |prefix_set| and the
|
| + // checksum calculated immediately after creation. This is almost
|
| + // certainly memory corruption.
|
| NOTREACHED();
|
| RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
|
| - if (restored.size() != prefixes.size())
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
|
| -
|
| - // Try to distinguish between updates from one broken user and a
|
| - // distributed problem.
|
| - static bool logged_broken = false;
|
| - if (!logged_broken) {
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
|
| - logged_broken = true;
|
| - }
|
| -
|
| - // This seems so very very unlikely. But if it ever were true, then
|
| - // it could explain why GetPrefixes() seemed broken.
|
| - if (sizeof(int) != sizeof(int32))
|
| - RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
|
|
|
| - // Check if memory was corrupted during construction.
|
| + // Broken because internal memory was corrupted during construction.
|
| if (!prefix_set->CheckChecksum())
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
|
| -
|
| - // Check whether |restored| is unsorted, or has duplication.
|
| - if (restored.size()) {
|
| - size_t unsorted_count = 0;
|
| - bool duplicates = false;
|
| - SBPrefix prev = restored[0];
|
| - for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
|
| - if (prev > restored[i]) {
|
| - unsorted_count++;
|
| - UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
|
| - prev - restored[i]);
|
| -
|
| - // When unsorted, how big is the set, and how far are we into
|
| - // it. If the set is very small or large, that might inform
|
| - // pursuit of a degenerate case. If the percentage is close
|
| - // to 0%, 100%, or 50%, then there might be an interesting
|
| - // degenerate case to explore.
|
| - UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
|
| - UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
|
| - i * 100 / restored.size());
|
| -
|
| - if (prefix_set->IsDeltaAt(i)) {
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
|
| -
|
| - // Histograms require memory on the order of the number of
|
| - // buckets, making high-precision logging expensive. For
|
| - // now aim for a sense of the range of the problem.
|
| - UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
|
| - prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
|
| - } else {
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
|
| - }
|
| - }
|
| - if (prev == restored[i])
|
| - duplicates = true;
|
| - }
|
| + RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM);
|
|
|
| - // Record findings.
|
| - if (unsorted_count) {
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
|
| - UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
|
| - }
|
| - if (duplicates)
|
| - RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
|
| -
|
| - // Fix the problems noted. If |restored| was unsorted, then
|
| - // |duplicates| may give a false negative.
|
| - if (unsorted_count)
|
| - std::sort(restored.begin(), restored.end());
|
| - if (unsorted_count || duplicates)
|
| - restored.erase(std::unique(restored.begin(), restored.end()),
|
| - restored.end());
|
| - }
|
| -
|
| - // NOTE(shess): The following could be done using a single
|
| - // uber-loop, but it's complicated by needing multiple parallel
|
| - // iterators. Didn't seem worthwhile for something that will only
|
| - // live for a short period and only fires for one in a million
|
| - // updates.
|
| + // TODO(shess): Test whether |prefixes| changed during construction.
|
|
|
| - // Find elements in |restored| which are not in |prefixes|.
|
| - std::vector<SBPrefix> difference;
|
| - std::set_difference(restored.begin(), restored.end(),
|
| - prefixes.begin(), prefixes.end(),
|
| - std::back_inserter(difference));
|
| - if (difference.size())
|
| - UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
|
| -
|
| - // Find elements in |prefixes| which are not in |restored|.
|
| - difference.clear();
|
| - std::set_difference(prefixes.begin(), prefixes.end(),
|
| - restored.begin(), restored.end(),
|
| - std::back_inserter(difference));
|
| - if (difference.size())
|
| - UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
|
| - difference.size());
|
| -
|
| - return prefix_set.release();
|
| + return CreateEmptyPrefixSet();
|
| }
|
|
|
| } // namespace
|
| @@ -616,7 +555,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
|
| BloomFilter::kBloomFilterSizeRatio);
|
| // TODO(shess): It is simpler for the code to assume that presence
|
| // of a bloom filter always implies presence of a prefix set.
|
| - prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
|
| + prefix_set_.reset(CreateEmptyPrefixSet());
|
| }
|
| // Wants to acquire the lock itself.
|
| WhitelistEverything(&csd_whitelist_);
|
| @@ -654,44 +593,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
|
| // not empty.
|
| const bool prefix_set_empty = !prefix_set_->GetSize();
|
|
|
| - // Used to double-check in case of a hit mis-match.
|
| - std::vector<SBPrefix> restored;
|
| -
|
| size_t miss_count = 0;
|
| for (size_t i = 0; i < full_hashes.size(); ++i) {
|
| bool found = prefix_set_->Exists(full_hashes[i].prefix);
|
|
|
| if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
|
| if (!prefix_set_empty) {
|
| - RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
|
| + RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT);
|
| + // This should be less than PREFIX_SET_BLOOM_HIT by the
|
| + // false positive rate.
|
| if (found)
|
| - RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
|
| + RecordPrefixSetInfo(PREFIX_SET_HIT);
|
| }
|
| prefix_hits->push_back(full_hashes[i].prefix);
|
| if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
|
| ++miss_count;
|
| } else {
|
| - // Bloom filter misses should never be in prefix set. Re-create
|
| - // the original prefixes and manually search for it, to check if
|
| - // there's a bug with how |Exists()| is implemented.
|
| - // |UpdateBrowseStore()| previously verified that
|
| - // |GetPrefixes()| returns the same prefixes as were passed to
|
| - // the constructor.
|
| + // Bloom filter misses should never be in prefix set.
|
| DCHECK(!found);
|
| - if (found && !prefix_set_empty) {
|
| - if (restored.empty())
|
| - prefix_set_->GetPrefixes(&restored);
|
| -
|
| - // If the item is not in the re-created list, then there is an
|
| - // error in |PrefixSet::Exists()|. If the item is in the
|
| - // re-created list, then the bloom filter was wrong.
|
| - if (std::binary_search(restored.begin(), restored.end(),
|
| - full_hashes[i].prefix)) {
|
| - RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
|
| - } else {
|
| - RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
|
| - }
|
| - }
|
| + if (found)
|
| + RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT);
|
| }
|
| }
|
|
|
| @@ -1386,7 +1307,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() {
|
| RecordFailure(FAILURE_DATABASE_FILTER_READ);
|
|
|
| // Use an empty prefix set until the first update.
|
| - prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
|
| + prefix_set_.reset(CreateEmptyPrefixSet());
|
| }
|
|
|
| bool SafeBrowsingDatabaseNew::Delete() {
|
|
|