| Index: chrome/browser/safe_browsing/safe_browsing_database.cc
 | 
| diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
 | 
| index 4234ed0d9a06a58a32a69735e386ba3df694ceff..edbcd961b6a5e731407fe3ca229dcd87af4dcf8a 100644
 | 
| --- a/chrome/browser/safe_browsing/safe_browsing_database.cc
 | 
| +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
 | 
| @@ -236,19 +236,26 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
 | 
|  // - Bloom filter false positives are prefix set misses.
 | 
|  // The following is to log actual performance to verify this.
 | 
|  enum PrefixSetEvent {
 | 
| -  PREFIX_SET_EVENT_HIT,
 | 
| -  PREFIX_SET_EVENT_BLOOM_HIT,
 | 
| -  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
 | 
| -  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
 | 
| +  // Hits to prefix set and bloom filter.
 | 
| +  PREFIX_SET_HIT,
 | 
| +  PREFIX_SET_BLOOM_HIT,
 | 
| +  // These were to track bloom misses which hit the prefix set, with
 | 
| +  // _INVALID to track where the item didn't appear to actually be in
 | 
| +  // the prefix set.  _INVALID was never hit.
 | 
| +  PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT,
 | 
| +  PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE,
 | 
| +  // GetPrefixes() after creation failed to get the same prefixes.
 | 
|    PREFIX_SET_GETPREFIXES_BROKEN,
 | 
| -  PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
 | 
| -  PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
 | 
| -  PREFIX_SET_SBPREFIX_WAS_BROKEN,
 | 
| -  PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
 | 
| -  PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
 | 
| -  PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
 | 
| -  PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
 | 
| -  PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
 | 
| +  // Fine-grained tests which didn't provide any good direction.
 | 
| +  PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE,
 | 
| +  PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE,
 | 
| +  PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE,
 | 
| +  PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE,
 | 
| +  PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE,
 | 
| +  PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE,
 | 
| +  PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE,
 | 
| +  // Failed checksum when creating prefix set.
 | 
| +  PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM,
 | 
|  
 | 
|    // Memory space for histograms is determined by the max.  ALWAYS ADD
 | 
|    // NEW VALUES BEFORE THIS ONE.
 | 
| @@ -260,10 +267,14 @@ void RecordPrefixSetInfo(PrefixSetEvent event_type) {
 | 
|                              PREFIX_SET_EVENT_MAX);
 | 
|  }
 | 
|  
 | 
| +// Helper to reduce code duplication.
 | 
| +safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
 | 
| +  return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
 | 
| +}
 | 
| +
 | 
|  // Generate a |PrefixSet| instance from the contents of
 | 
|  // |add_prefixes|.  Additionally performs various checks to make sure
 | 
| -// that the resulting prefix set is valid, so that the
 | 
| -// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
 | 
| +// that the resulting prefix set is valid, so that histograms in
 | 
|  // ContainsBrowseUrl() can be trustworthy.
 | 
|  safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
 | 
|      const SBAddPrefixes& add_prefixes) {
 | 
| @@ -293,107 +304,35 @@ safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
 | 
|        std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
 | 
|      return prefix_set.release();
 | 
|  
 | 
| -  // Log BROKEN for continuity with previous release, and SIZE to
 | 
| -  // distinguish which test failed.
 | 
| +  // NOTE(shess): Past histograms have indicated that in a given day,
 | 
| +  // about 1 in 100,000 updates result in a PrefixSet which was
 | 
| +  // inconsistent relative to the BloomFilter.  Windows is about 5x
 | 
| +  // more likely to build an inconsistent PrefixSet than Mac.  A
 | 
| +  // number of developers have reviewed the code, and I ran extensive
 | 
| +  // fuzzing with random data, so at this point I'm trying to
 | 
| +  // demonstrate memory corruption.
 | 
| +  //
 | 
| +  // Other findings from past instrumentation:
 | 
| +  // - half of one percent of brokenness cases implied duplicate items
 | 
| +  //   in |prefixes|.  Per the code above, this should not be
 | 
| +  //   possible.
 | 
| +  // - about 1/20 of broken cases happened more than once for a given
 | 
| +  //   user.  Note that empty updates generally don't hit this code at
 | 
| +  //   all, so that may not imply a specific input pattern breaking things.
 | 
| +  // - about 1/3 of broken cases show a checksum mismatch between the
 | 
| +  //   checksum calculated while creating |prefix_set| and the
 | 
| +  //   checksum calculated immediately after creation.  This is almost
 | 
| +  //   certainly memory corruption.
 | 
|    NOTREACHED();
 | 
|    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
 | 
| -  if (restored.size() != prefixes.size())
 | 
| -    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
 | 
| -
 | 
| -  // Try to distinguish between updates from one broken user and a
 | 
| -  // distributed problem.
 | 
| -  static bool logged_broken = false;
 | 
| -  if (!logged_broken) {
 | 
| -    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
 | 
| -    logged_broken = true;
 | 
| -  }
 | 
| -
 | 
| -  // This seems so very very unlikely.  But if it ever were true, then
 | 
| -  // it could explain why GetPrefixes() seemed broken.
 | 
| -  if (sizeof(int) != sizeof(int32))
 | 
| -    RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
 | 
|  
 | 
| -  // Check if memory was corrupted during construction.
 | 
| +  // Broken because internal memory was corrupted during construction.
 | 
|    if (!prefix_set->CheckChecksum())
 | 
| -    RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
 | 
| -
 | 
| -  // Check whether |restored| is unsorted, or has duplication.
 | 
| -  if (restored.size()) {
 | 
| -    size_t unsorted_count = 0;
 | 
| -    bool duplicates = false;
 | 
| -    SBPrefix prev = restored[0];
 | 
| -    for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
 | 
| -      if (prev > restored[i]) {
 | 
| -        unsorted_count++;
 | 
| -        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
 | 
| -                             prev - restored[i]);
 | 
| -
 | 
| -        // When unsorted, how big is the set, and how far are we into
 | 
| -        // it.  If the set is very small or large, that might inform
 | 
| -        // pursuit of a degenerate case.  If the percentage is close
 | 
| -        // to 0%, 100%, or 50%, then there might be an interesting
 | 
| -        // degenerate case to explore.
 | 
| -        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
 | 
| -        UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
 | 
| -                                 i * 100 / restored.size());
 | 
| -
 | 
| -        if (prefix_set->IsDeltaAt(i)) {
 | 
| -          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
 | 
| -
 | 
| -          // Histograms require memory on the order of the number of
 | 
| -          // buckets, making high-precision logging expensive.  For
 | 
| -          // now aim for a sense of the range of the problem.
 | 
| -          UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
 | 
| -                                      prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
 | 
| -        } else {
 | 
| -          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
 | 
| -        }
 | 
| -      }
 | 
| -      if (prev == restored[i])
 | 
| -        duplicates = true;
 | 
| -    }
 | 
| +    RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM);
 | 
|  
 | 
| -    // Record findings.
 | 
| -    if (unsorted_count) {
 | 
| -      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
 | 
| -      UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
 | 
| -    }
 | 
| -    if (duplicates)
 | 
| -      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
 | 
| -
 | 
| -    // Fix the problems noted.  If |restored| was unsorted, then
 | 
| -    // |duplicates| may give a false negative.
 | 
| -    if (unsorted_count)
 | 
| -      std::sort(restored.begin(), restored.end());
 | 
| -    if (unsorted_count || duplicates)
 | 
| -      restored.erase(std::unique(restored.begin(), restored.end()),
 | 
| -                     restored.end());
 | 
| -  }
 | 
| -
 | 
| -  // NOTE(shess): The following could be done using a single
 | 
| -  // uber-loop, but it's complicated by needing multiple parallel
 | 
| -  // iterators.  Didn't seem worthwhile for something that will only
 | 
| -  // live for a short period and only fires for one in a million
 | 
| -  // updates.
 | 
| +  // TODO(shess): Test whether |prefixes| changed during construction.
 | 
|  
 | 
| -  // Find elements in |restored| which are not in |prefixes|.
 | 
| -  std::vector<SBPrefix> difference;
 | 
| -  std::set_difference(restored.begin(), restored.end(),
 | 
| -                      prefixes.begin(), prefixes.end(),
 | 
| -                      std::back_inserter(difference));
 | 
| -  if (difference.size())
 | 
| -    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
 | 
| -
 | 
| -  // Find elements in |prefixes| which are not in |restored|.
 | 
| -  difference.clear();
 | 
| -  std::set_difference(prefixes.begin(), prefixes.end(),
 | 
| -                      restored.begin(), restored.end(),
 | 
| -                      std::back_inserter(difference));
 | 
| -  if (difference.size())
 | 
| -    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
 | 
| -                             difference.size());
 | 
| -
 | 
| -  return prefix_set.release();
 | 
| +  return CreateEmptyPrefixSet();
 | 
|  }
 | 
|  
 | 
|  }  // namespace
 | 
| @@ -616,7 +555,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
 | 
|                                             BloomFilter::kBloomFilterSizeRatio);
 | 
|      // TODO(shess): It is simpler for the code to assume that presence
 | 
|      // of a bloom filter always implies presence of a prefix set.
 | 
| -    prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
 | 
| +    prefix_set_.reset(CreateEmptyPrefixSet());
 | 
|    }
 | 
|    // Wants to acquire the lock itself.
 | 
|    WhitelistEverything(&csd_whitelist_);
 | 
| @@ -654,44 +593,26 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
 | 
|    // not empty.
 | 
|    const bool prefix_set_empty = !prefix_set_->GetSize();
 | 
|  
 | 
| -  // Used to double-check in case of a hit mis-match.
 | 
| -  std::vector<SBPrefix> restored;
 | 
| -
 | 
|    size_t miss_count = 0;
 | 
|    for (size_t i = 0; i < full_hashes.size(); ++i) {
 | 
|      bool found = prefix_set_->Exists(full_hashes[i].prefix);
 | 
|  
 | 
|      if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
 | 
|        if (!prefix_set_empty) {
 | 
| -        RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
 | 
| +        RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT);
 | 
| +        // This should be less than PREFIX_SET_BLOOM_HIT by the
 | 
| +        // false positive rate.
 | 
|          if (found)
 | 
| -          RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
 | 
| +          RecordPrefixSetInfo(PREFIX_SET_HIT);
 | 
|        }
 | 
|        prefix_hits->push_back(full_hashes[i].prefix);
 | 
|        if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
 | 
|          ++miss_count;
 | 
|      } else {
 | 
| -      // Bloom filter misses should never be in prefix set.  Re-create
 | 
| -      // the original prefixes and manually search for it, to check if
 | 
| -      // there's a bug with how |Exists()| is implemented.
 | 
| -      // |UpdateBrowseStore()| previously verified that
 | 
| -      // |GetPrefixes()| returns the same prefixes as were passed to
 | 
| -      // the constructor.
 | 
| +      // Bloom filter misses should never be in prefix set.
 | 
|        DCHECK(!found);
 | 
| -      if (found && !prefix_set_empty) {
 | 
| -        if (restored.empty())
 | 
| -          prefix_set_->GetPrefixes(&restored);
 | 
| -
 | 
| -        // If the item is not in the re-created list, then there is an
 | 
| -        // error in |PrefixSet::Exists()|.  If the item is in the
 | 
| -        // re-created list, then the bloom filter was wrong.
 | 
| -        if (std::binary_search(restored.begin(), restored.end(),
 | 
| -                               full_hashes[i].prefix)) {
 | 
| -          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
 | 
| -        } else {
 | 
| -          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
 | 
| -        }
 | 
| -      }
 | 
| +      if (found)
 | 
| +        RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT);
 | 
|      }
 | 
|    }
 | 
|  
 | 
| @@ -1386,7 +1307,7 @@ void SafeBrowsingDatabaseNew::LoadBloomFilter() {
 | 
|      RecordFailure(FAILURE_DATABASE_FILTER_READ);
 | 
|  
 | 
|    // Use an empty prefix set until the first update.
 | 
| -  prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
 | 
| +  prefix_set_.reset(CreateEmptyPrefixSet());
 | 
|  }
 | 
|  
 | 
|  bool SafeBrowsingDatabaseNew::Delete() {
 | 
| 
 |