Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(35)

Unified Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 10802098: Cleanup histograms tracking prefix-set issues in safe-browsing. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/safe_browsing/safe_browsing_database.cc
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index 4234ed0d9a06a58a32a69735e386ba3df694ceff..edbcd961b6a5e731407fe3ca229dcd87af4dcf8a 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -236,19 +236,26 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
// - Bloom filter false positives are prefix set misses.
// The following is to log actual performance to verify this.
enum PrefixSetEvent {
- PREFIX_SET_EVENT_HIT,
- PREFIX_SET_EVENT_BLOOM_HIT,
- PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
- PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
+ // Hits to prefix set and bloom filter.
+ PREFIX_SET_HIT,
+ PREFIX_SET_BLOOM_HIT,
+ // These were to track bloom misses which hit the prefix set, with
+ // _INVALID to track where the item didn't appear to actually be in
+ // the prefix set. _INVALID was never hit.
+ PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT,
+ PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE,
+ // GetPrefixes() after creation failed to get the same prefixes.
PREFIX_SET_GETPREFIXES_BROKEN,
- PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
- PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
- PREFIX_SET_SBPREFIX_WAS_BROKEN,
- PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
- PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
- PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
- PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
- PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
+ // Fine-grained tests which didn't provide any good direction.
+ PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE,
+ PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE,
+ PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE,
+ PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE,
+ PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE,
+ PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE,
+ PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE,
+ // Failed checksum when creating prefix set.
+ PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM,
// Memory space for histograms is determined by the max. ALWAYS ADD
// NEW VALUES BEFORE THIS ONE.
@@ -260,10 +267,14 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) {
PREFIX_SET_EVENT_MAX);
}
+// Helper to reduce code duplication.
+safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
+ return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
+}
+
// Generate a |PrefixSet| instance from the contents of
// |add_prefixes|. Additionally performs various checks to make sure
-// that the resulting prefix set is valid, so that the
-// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
+// that the resulting prefix set is valid, so that histograms in
// ContainsBrowseUrl() can be trustworthy.
safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
const SBAddPrefixes& add_prefixes) {
@@ -293,107 +304,35 @@ safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
return prefix_set.release();
- // Log BROKEN for continuity with previous release, and SIZE to
- // distinguish which test failed.
+ // NOTE(shess): Past histograms have indicated that in a given day,
+ // about 1 in 100,000 updates result in a PrefixSet which was
+ // inconsistent relative to the BloomFilter. Windows is about 5x
+ // more likely to build an inconsistent PrefixSet than Mac. A
+ // number of developers have reviewed the code, and I ran extensive
+ // fuzzing with random data, so at this point I'm trying to
+ // demonstrate memory corruption.
+ //
+ // Other findings from past instrumentation:
+ // - half of one percent of brokenness cases implied duplicate items
+ // in |prefixes|. Per the code above, this should not be
+ // possible.
+ // - about 1/20 of broken cases happened more than once for a given
+ // user. Note that empty updates generally don't hit this code at
+ // all, so that may not imply a specific input pattern breaking things.
+ // - about 1/3 of broken cases show a checksum mismatch between the
+ // checksum calculated while creating |prefix_set| and the
+ // checksum calculated immediately after creation. This is almost
+ // certainly memory corruption.
NOTREACHED();
RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
- if (restored.size() != prefixes.size())
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
-
- // Try to distinguish between updates from one broken user and a
- // distributed problem.
- static bool logged_broken = false;
- if (!logged_broken) {
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
- logged_broken = true;
- }
-
- // This seems so very very unlikely. But if it ever were true, then
- // it could explain why GetPrefixes() seemed broken.
- if (sizeof(int) != sizeof(int32))
- RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
- // Check if memory was corrupted during construction.
+ // Broken because internal memory was corrupted during construction.
if (!prefix_set->CheckChecksum())
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
-
- // Check whether |restored| is unsorted, or has duplication.
- if (restored.size()) {
- size_t unsorted_count = 0;
- bool duplicates = false;
- SBPrefix prev = restored[0];
- for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
- if (prev > restored[i]) {
- unsorted_count++;
- UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
- prev - restored[i]);
-
- // When unsorted, how big is the set, and how far are we into
- // it. If the set is very small or large, that might inform
- // pursuit of a degenerate case. If the percentage is close
- // to 0%, 100%, or 50%, then there might be an interesting
- // degenerate case to explore.
- UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
- UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
- i * 100 / restored.size());
-
- if (prefix_set->IsDeltaAt(i)) {
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
-
- // Histograms require memory on the order of the number of
- // buckets, making high-precision logging expensive. For
- // now aim for a sense of the range of the problem.
- UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
- prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
- } else {
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
- }
- }
- if (prev == restored[i])
- duplicates = true;
- }
+ RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM);
- // Record findings.
- if (unsorted_count) {
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
- UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
- }
- if (duplicates)
- RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
-
- // Fix the problems noted. If |restored| was unsorted, then
- // |duplicates| may give a false negative.
- if (unsorted_count)
- std::sort(restored.begin(), restored.end());
- if (unsorted_count || duplicates)
- restored.erase(std::unique(restored.begin(), restored.end()),
- restored.end());
- }
-
- // NOTE(shess): The following could be done using a single
- // uber-loop, but it's complicated by needing multiple parallel
- // iterators. Didn't seem worthwhile for something that will only
- // live for a short period and only fires for one in a million
- // updates.
+ // TODO(shess): Test whether |prefixes| changed during construction.
- // Find elements in |restored| which are not in |prefixes|.
- std::vector<SBPrefix> difference;
- std::set_difference(restored.begin(), restored.end(),
- prefixes.begin(), prefixes.end(),
- std::back_inserter(difference));
- if (difference.size())
- UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
-
- // Find elements in |prefixes| which are not in |restored|.
- difference.clear();
- std::set_difference(prefixes.begin(), prefixes.end(),
- restored.begin(), restored.end(),
- std::back_inserter(difference));
- if (difference.size())
- UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
- difference.size());
-
- return prefix_set.release();
+ return CreateEmptyPrefixSet();
}
} // namespace
@@ -616,7 +555,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
BloomFilter::kBloomFilterSizeRatio);
// TODO(shess): It is simpler for the code to assume that presence
// of a bloom filter always implies presence of a prefix set.
- prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
+ prefix_set_.reset(CreateEmptyPrefixSet());
}
// Wants to acquire the lock itself.
WhitelistEverything(&csd_whitelist_);
@@ -654,44 +593,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
// not empty.
const bool prefix_set_empty = !prefix_set_->GetSize();
- // Used to double-check in case of a hit mis-match.
- std::vector<SBPrefix> restored;
-
size_t miss_count = 0;
for (size_t i = 0; i < full_hashes.size(); ++i) {
bool found = prefix_set_->Exists(full_hashes[i].prefix);
if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
if (!prefix_set_empty) {
- RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
+ RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT);
+ // This should be less than PREFIX_SET_BLOOM_HIT by the
+ // false positive rate.
if (found)
- RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
+ RecordPrefixSetInfo(PREFIX_SET_HIT);
}
prefix_hits->push_back(full_hashes[i].prefix);
if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
++miss_count;
} else {
- // Bloom filter misses should never be in prefix set. Re-create
- // the original prefixes and manually search for it, to check if
- // there's a bug with how |Exists()| is implemented.
- // |UpdateBrowseStore()| previously verified that
- // |GetPrefixes()| returns the same prefixes as were passed to
- // the constructor.
+ // Bloom filter misses should never be in prefix set.
DCHECK(!found);
- if (found && !prefix_set_empty) {
- if (restored.empty())
- prefix_set_->GetPrefixes(&restored);
-
- // If the item is not in the re-created list, then there is an
- // error in |PrefixSet::Exists()|. If the item is in the
- // re-created list, then the bloom filter was wrong.
- if (std::binary_search(restored.begin(), restored.end(),
- full_hashes[i].prefix)) {
- RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
- } else {
- RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
- }
- }
+ if (found)
+ RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT);
}
}
@@ -1386,7 +1307,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() {
RecordFailure(FAILURE_DATABASE_FILTER_READ);
// Use an empty prefix set until the first update.
- prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
+ prefix_set_.reset(CreateEmptyPrefixSet());
}
bool SafeBrowsingDatabaseNew::Delete() {
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698