| Index: chrome/browser/safe_browsing/safe_browsing_database.cc
|
| diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| index ea958d3d24445a944cfad9c9e6f5bb7acd896dda..784904eb6a2bed16750367f9808aa24272856fbc 100644
|
| --- a/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| @@ -258,134 +258,13 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
|
| return a.full_hash.prefix < b.full_hash.prefix;
|
| }
|
|
|
| -// As compared to the bloom filter, PrefixSet should have these
|
| -// properties:
|
| -// - Any bloom filter miss should be a prefix set miss.
|
| -// - Any prefix set hit should be a bloom filter hit.
|
| -// - Bloom filter false positives are prefix set misses.
|
| -// The following is to log actual performance to verify this.
|
| -enum PrefixSetEvent {
|
| - // Hits to prefix set and bloom filter.
|
| - PREFIX_SET_HIT,
|
| - PREFIX_SET_BLOOM_HIT,
|
| - // These were to track bloom misses which hit the prefix set, with
|
| - // _INVALID to track where the item didn't appear to actually be in
|
| - // the prefix set. _INVALID was never hit.
|
| - PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_OBSOLETE,
|
| - PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE,
|
| - // GetPrefixes() after creation failed to get the same prefixes.
|
| - PREFIX_SET_GETPREFIXES_BROKEN,
|
| - // Fine-grained tests which didn't provide any good direction.
|
| - PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE,
|
| - PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE,
|
| - PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE,
|
| - PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE,
|
| - PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE,
|
| - PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE,
|
| - PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE,
|
| - // Failed checksums when creating filters.
|
| - PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM,
|
| - PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM,
|
| - PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM,
|
| - PREFIX_SET_CREATE_PREFIXES_CHECKSUM,
|
| - PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM,
|
| - // Failed checksums when probing the filters.
|
| - PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM,
|
| - PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM,
|
| -
|
| - // Recorded once per update if the impossible occurs.
|
| - PREFIX_SET_BLOOM_MISS_PREFIX_HIT,
|
| -
|
| - // Corresponding CHECKSUM failed, but on retry it succeeded.
|
| - PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED,
|
| - PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED,
|
| -
|
| - // Memory space for histograms is determined by the max. ALWAYS ADD
|
| - // NEW VALUES BEFORE THIS ONE.
|
| - PREFIX_SET_EVENT_MAX
|
| -};
|
| -
|
| -void RecordPrefixSetInfo(PrefixSetEvent event_type) {
|
| - UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
|
| - PREFIX_SET_EVENT_MAX);
|
| -}
|
| -
|
| // Helper to reduce code duplication.
|
| safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
|
| return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
|
| }
|
|
|
| -// Generate xor "checksum" of SBPrefix part of add prefixes. Pass 0
|
| -// for |seed| will return a checksum, passing a previous checksum for
|
| -// |seed| will return 0 (if it checks out). Coded this way in hopes
|
| -// that the compiler won't optimize separate runs into a single
|
| -// temporary.
|
| -SBPrefix ChecksumAddPrefixes(SBPrefix seed,
|
| - const SBAddPrefixes& add_prefixes) {
|
| - SBPrefix checksum = seed;
|
| - for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
|
| - iter != add_prefixes.end(); ++iter) {
|
| - checksum ^= iter->prefix;
|
| - }
|
| - return checksum;
|
| -}
|
| -
|
| -// Generate xor "checksum" of prefixes.
|
| -SBPrefix ChecksumPrefixes(SBPrefix seed,
|
| - const std::vector<SBPrefix>& prefixes) {
|
| - SBPrefix checksum = seed;
|
| - for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
|
| - iter != prefixes.end(); ++iter) {
|
| - checksum ^= *iter;
|
| - }
|
| - return checksum;
|
| -}
|
| -
|
| -// Prefix set doesn't store duplicates, making it hard to verify that
|
| -// checksums match. Also hard is verifying that nothing was corrupted
|
| -// while removing duplicates from a vector. So this generates a
|
| -// checksum of only the unique elements.
|
| -SBPrefix ChecksumUniquePrefixes(const std::vector<SBPrefix>& prefixes) {
|
| - // Handle edge case up front.
|
| - if (prefixes.size() == 0)
|
| - return 0;
|
| -
|
| - std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
|
| - SBPrefix checksum = *iter++;
|
| - while (iter != prefixes.end()) {
|
| - if (*(iter - 1) != *iter)
|
| - checksum ^= *iter;
|
| - iter++;
|
| - }
|
| - return checksum;
|
| -}
|
| -
|
| -// NOTE(shess): Past histograms have indicated that in a given day,
|
| -// about 1 in 100,000 updates result in a PrefixSet which was
|
| -// inconsistent relative to the BloomFilter. Windows is about 5x more
|
| -// likely to build an inconsistent PrefixSet than Mac. A number of
|
| -// developers have reviewed the code, and I ran extensive fuzzing with
|
| -// random data, so at this point I'm trying to demonstrate memory
|
| -// corruption.
|
| -//
|
| -// Other findings from past instrumentation:
|
| -// - half of one percent of brokenness cases implied duplicate items
|
| -// in |prefixes|. Per the code above, this should not be possible.
|
| -// - about 1/20 of broken cases happened more than once for a given
|
| -// user. Note that empty updates generally don't hit this code at
|
| -// all, so that may not imply a specific input pattern breaking
|
| -// things.
|
| -// - about 1/3 of broken cases show a checksum mismatch between the
|
| -// checksum calculated while creating |prefix_set| and the checksum
|
| -// calculated immediately after creation. This is almost certainly
|
| -// memory corruption.
|
| -
|
| // Generate |PrefixSet| and |BloomFilter| instances from the contents
|
| -// of |add_prefixes|. Verify that the results are internally
|
| -// consistent, and that the input maintained consistence while
|
| -// constructing (which should assure that they are mutually
|
| -// consistent). Returns an empty prefix set if any of the checks
|
| -// fail.
|
| +// of |add_prefixes|.
|
| void FiltersFromAddPrefixes(
|
| const SBAddPrefixes& add_prefixes,
|
| scoped_refptr<BloomFilter>* bloom_filter,
|
| @@ -398,8 +277,6 @@ void FiltersFromAddPrefixes(
|
| return;
|
| }
|
|
|
| - const SBPrefix add_prefixes_checksum = ChecksumAddPrefixes(0, add_prefixes);
|
| -
|
| // TODO(shess): If |add_prefixes| were sorted by the prefix, it
|
| // could be passed directly to |PrefixSet()|, removing the need for
|
| // |prefixes|.
|
| @@ -411,105 +288,12 @@ void FiltersFromAddPrefixes(
|
| }
|
| std::sort(prefixes.begin(), prefixes.end());
|
|
|
| - const SBPrefix unique_prefixes_checksum = ChecksumUniquePrefixes(prefixes);
|
| -
|
| for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
|
| iter != prefixes.end(); ++iter) {
|
| bloom_filter->get()->Insert(*iter);
|
| }
|
|
|
| prefix_set->reset(new safe_browsing::PrefixSet(prefixes));
|
| -
|
| - // "unit test" for ChecksumUniquePrefixes() and GetPrefixesChecksum().
|
| - if (DCHECK_IS_ON()) {
|
| - std::vector<SBPrefix> unique(prefixes);
|
| - unique.erase(std::unique(unique.begin(), unique.end()), unique.end());
|
| - DCHECK_EQ(0, ChecksumPrefixes(unique_prefixes_checksum, unique));
|
| -
|
| - std::vector<SBPrefix> restored;
|
| - prefix_set->get()->GetPrefixes(&restored);
|
| - DCHECK_EQ(0, ChecksumPrefixes(prefix_set->get()->GetPrefixesChecksum(),
|
| - restored));
|
| - }
|
| -
|
| - // TODO(shess): Need a barrier to prevent ordering checks above here
|
| - // or construction below here?
|
| -
|
| - // NOTE(shess): So far, the fallout is:
|
| - // 605 PREFIX_SET_CHECKSUM (failed prefix_set->CheckChecksum()).
|
| - // 32 BLOOM_FILTER_CHECKSUM (failed bloom_filter->CheckChecksum()).
|
| - // 1 ADD_PREFIXES_CHECKSUM (contents of add_prefixes changed).
|
| - // 7 PREFIXES_CHECKSUM (contents of prefixes changed).
|
| -
|
| - bool get_prefixes_broken =
|
| - (unique_prefixes_checksum != prefix_set->get()->GetPrefixesChecksum());
|
| - bool prefix_set_broken = !prefix_set->get()->CheckChecksum();
|
| - bool bloom_filter_broken = !bloom_filter->get()->CheckChecksum();
|
| - bool prefixes_input_broken =
|
| - (0 != ChecksumPrefixes(add_prefixes_checksum, prefixes));
|
| - bool add_prefixes_input_broken =
|
| - (0 != ChecksumAddPrefixes(add_prefixes_checksum, add_prefixes));
|
| -
|
| - if (add_prefixes_input_broken) {
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM);
|
| - } else if (prefixes_input_broken) {
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIXES_CHECKSUM);
|
| - }
|
| -
|
| - if (prefix_set_broken) {
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM);
|
| -
|
| - // Failing PrefixSet::CheckChecksum() implies that the set's
|
| - // internal data changed during construction. If the retry
|
| - // consistently succeeds, that implies memory corruption. If the
|
| - // retry consistently fails, that implies PrefixSet is broken.
|
| - scoped_ptr<safe_browsing::PrefixSet> retry_set(
|
| - new safe_browsing::PrefixSet(prefixes));
|
| - if (retry_set->CheckChecksum())
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED);
|
| -
|
| - // TODO(shess): In case of double failure, consider pinning the
|
| - // data and calling NOTREACHED(). But it's a lot of data. Could
|
| - // also implement a binary search to constrain the amount of input
|
| - // to consider, but that's a lot of complexity.
|
| - }
|
| -
|
| - // TODO(shess): Obviously this is a problem for operation of the
|
| - // bloom filter! But for purposes of checking prefix set operation,
|
| - // all that matters is not messing up the histograms recorded later.
|
| - if (bloom_filter_broken) {
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM);
|
| -
|
| - // As for PrefixSet, failing BloomFilter::CheckChecksum() implies
|
| - // that the filter's internal data was changed.
|
| - scoped_refptr<BloomFilter> retry_filter(new BloomFilter(filter_size));
|
| - for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
|
| - iter != prefixes.end(); ++iter) {
|
| - retry_filter->Insert(*iter);
|
| - }
|
| - if (retry_filter->CheckChecksum())
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED);
|
| - }
|
| -
|
| - // Attempt to isolate the case where the output of GetPrefixes()
|
| - // would differ from the input presented. This case implies that
|
| - // PrefixSet has an actual implementation flaw, and may in the
|
| - // future warrant more aggressive action, such as somehow dumping
|
| - // |prefixes| up to the crash server.
|
| - if (get_prefixes_broken && !prefix_set_broken && !prefixes_input_broken)
|
| - RecordPrefixSetInfo(PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM);
|
| -
|
| - // If anything broke, clear the prefix set to prevent pollution of
|
| - // histograms generated later.
|
| - if (get_prefixes_broken || prefix_set_broken || bloom_filter_broken ||
|
| - prefixes_input_broken || add_prefixes_input_broken) {
|
| - DCHECK(!get_prefixes_broken);
|
| - DCHECK(!prefix_set_broken);
|
| - DCHECK(!bloom_filter_broken);
|
| - DCHECK(!prefixes_input_broken);
|
| - DCHECK(!add_prefixes_input_broken);
|
| - prefix_set->reset(CreateEmptyPrefixSet());
|
| - }
|
| }
|
|
|
| } // namespace
|
| @@ -764,59 +548,13 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
|
|
|
| if (!browse_bloom_filter_.get())
|
| return false;
|
| - DCHECK(prefix_set_.get());
|
| -
|
| - // |prefix_set_| is empty until the first update, only log info if
|
| - // not empty.
|
| - const bool prefix_set_empty = !prefix_set_->GetSize();
|
| -
|
| - // Track cases where the filters were not consistent with each other.
|
| - bool bloom_hit_prefix_miss = false;
|
| - bool bloom_miss_prefix_hit = false;
|
|
|
| size_t miss_count = 0;
|
| for (size_t i = 0; i < full_hashes.size(); ++i) {
|
| - bool found = prefix_set_->Exists(full_hashes[i].prefix);
|
| -
|
| if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
|
| - if (!prefix_set_empty) {
|
| - RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT);
|
| - // This should be less than PREFIX_SET_BLOOM_HIT by the
|
| - // false positive rate.
|
| - if (found) {
|
| - RecordPrefixSetInfo(PREFIX_SET_HIT);
|
| - } else {
|
| - // Could be false positive, but _could_ be corruption.
|
| - bloom_hit_prefix_miss = true;
|
| - }
|
| - }
|
| prefix_hits->push_back(full_hashes[i].prefix);
|
| if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
|
| ++miss_count;
|
| - } else {
|
| - // Bloom filter misses should never be in prefix set. Check
|
| - // to see if the prefix set or bloom filter have changed since
|
| - // being created.
|
| - DCHECK(!found);
|
| - if (found && !prefix_set_empty)
|
| - bloom_miss_prefix_hit = true;
|
| - }
|
| - }
|
| -
|
| - // In case of inconsistent results, verify the checksums and record
|
| - // failures (in case of corruption inconsistencies would be
|
| - // expected). In case of corruption clear out |prefix_set_|, once
|
| - // we've noticed corruption there is no point to future comparisons.
|
| - if (bloom_miss_prefix_hit || bloom_hit_prefix_miss) {
|
| - if (!prefix_set_->CheckChecksum()) {
|
| - RecordPrefixSetInfo(PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM);
|
| - prefix_set_.reset(CreateEmptyPrefixSet());
|
| - } else if (!browse_bloom_filter_->CheckChecksum()) {
|
| - RecordPrefixSetInfo(PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM);
|
| - prefix_set_.reset(CreateEmptyPrefixSet());
|
| - } else if (bloom_miss_prefix_hit) {
|
| - // This case should be impossible if the filters are both valid.
|
| - RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_HIT);
|
| }
|
| }
|
|
|
|
|