Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(569)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 10892016: Strip out safe-browsing prefix-set checksumming code. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Oops, remove unit test for deleted code. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/safe_browsing/prefix_set_unittest.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 if (subs_deleted > 0) 251 if (subs_deleted > 0)
252 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); 252 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
253 } 253 }
254 254
255 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 255 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
256 // safe_browsing_store.h orders on both chunk-id and prefix. 256 // safe_browsing_store.h orders on both chunk-id and prefix.
257 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 257 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
258 return a.full_hash.prefix < b.full_hash.prefix; 258 return a.full_hash.prefix < b.full_hash.prefix;
259 } 259 }
260 260
261 // As compared to the bloom filter, PrefixSet should have these
262 // properties:
263 // - Any bloom filter miss should be a prefix set miss.
264 // - Any prefix set hit should be a bloom filter hit.
265 // - Bloom filter false positives are prefix set misses.
266 // The following is to log actual performance to verify this.
267 enum PrefixSetEvent {
268 // Hits to prefix set and bloom filter.
269 PREFIX_SET_HIT,
270 PREFIX_SET_BLOOM_HIT,
271 // These were to track bloom misses which hit the prefix set, with
272 // _INVALID to track where the item didn't appear to actually be in
273 // the prefix set. _INVALID was never hit.
274 PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_OBSOLETE,
275 PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE,
276 // GetPrefixes() after creation failed to get the same prefixes.
277 PREFIX_SET_GETPREFIXES_BROKEN,
278 // Fine-grained tests which didn't provide any good direction.
279 PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE,
280 PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE,
281 PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE,
282 PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE,
283 PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE,
284 PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE,
285 PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE,
286 // Failed checksums when creating filters.
287 PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM,
288 PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM,
289 PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM,
290 PREFIX_SET_CREATE_PREFIXES_CHECKSUM,
291 PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM,
292 // Failed checksums when probing the filters.
293 PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM,
294 PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM,
295
296 // Recorded once per update if the impossible occurs.
297 PREFIX_SET_BLOOM_MISS_PREFIX_HIT,
298
299 // Corresponding CHECKSUM failed, but on retry it succeeded.
300 PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED,
301 PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED,
302
303 // Memory space for histograms is determined by the max. ALWAYS ADD
304 // NEW VALUES BEFORE THIS ONE.
305 PREFIX_SET_EVENT_MAX
306 };
307
308 void RecordPrefixSetInfo(PrefixSetEvent event_type) {
309 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
310 PREFIX_SET_EVENT_MAX);
311 }
312
313 // Helper to reduce code duplication. 261 // Helper to reduce code duplication.
314 safe_browsing::PrefixSet* CreateEmptyPrefixSet() { 262 safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
315 return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); 263 return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
316 } 264 }
317 265
318 // Generate xor "checksum" of SBPrefix part of add prefixes. Pass 0
319 // for |seed| will return a checksum, passing a previous checksum for
320 // |seed| will return 0 (if it checks out). Coded this way in hopes
321 // that the compiler won't optimize separate runs into a single
322 // temporary.
323 SBPrefix ChecksumAddPrefixes(SBPrefix seed,
324 const SBAddPrefixes& add_prefixes) {
325 SBPrefix checksum = seed;
326 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
327 iter != add_prefixes.end(); ++iter) {
328 checksum ^= iter->prefix;
329 }
330 return checksum;
331 }
332
333 // Generate xor "checksum" of prefixes.
334 SBPrefix ChecksumPrefixes(SBPrefix seed,
335 const std::vector<SBPrefix>& prefixes) {
336 SBPrefix checksum = seed;
337 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
338 iter != prefixes.end(); ++iter) {
339 checksum ^= *iter;
340 }
341 return checksum;
342 }
343
344 // Prefix set doesn't store duplicates, making it hard to verify that
345 // checksums match. Also hard is verifying that nothing was corrupted
346 // while removing duplicates from a vector. So this generates a
347 // checksum of only the unique elements.
348 SBPrefix ChecksumUniquePrefixes(const std::vector<SBPrefix>& prefixes) {
349 // Handle edge case up front.
350 if (prefixes.size() == 0)
351 return 0;
352
353 std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
354 SBPrefix checksum = *iter++;
355 while (iter != prefixes.end()) {
356 if (*(iter - 1) != *iter)
357 checksum ^= *iter;
358 iter++;
359 }
360 return checksum;
361 }
362
363 // NOTE(shess): Past histograms have indicated that in a given day,
364 // about 1 in 100,000 updates result in a PrefixSet which was
365 // inconsistent relative to the BloomFilter. Windows is about 5x more
366 // likely to build an inconsistent PrefixSet than Mac. A number of
367 // developers have reviewed the code, and I ran extensive fuzzing with
368 // random data, so at this point I'm trying to demonstrate memory
369 // corruption.
370 //
371 // Other findings from past instrumentation:
372 // - half of one percent of brokenness cases implied duplicate items
373 // in |prefixes|. Per the code above, this should not be possible.
374 // - about 1/20 of broken cases happened more than once for a given
375 // user. Note that empty updates generally don't hit this code at
376 // all, so that may not imply a specific input pattern breaking
377 // things.
378 // - about 1/3 of broken cases show a checksum mismatch between the
379 // checksum calculated while creating |prefix_set| and the checksum
380 // calculated immediately after creation. This is almost certainly
381 // memory corruption.
382
383 // Generate |PrefixSet| and |BloomFilter| instances from the contents 266 // Generate |PrefixSet| and |BloomFilter| instances from the contents
384 // of |add_prefixes|. Verify that the results are internally 267 // of |add_prefixes|.
385 // consistent, and that the input maintained consistence while
386 // constructing (which should assure that they are mutually
387 // consistent). Returns an empty prefix set if any of the checks
388 // fail.
389 void FiltersFromAddPrefixes( 268 void FiltersFromAddPrefixes(
390 const SBAddPrefixes& add_prefixes, 269 const SBAddPrefixes& add_prefixes,
391 scoped_refptr<BloomFilter>* bloom_filter, 270 scoped_refptr<BloomFilter>* bloom_filter,
392 scoped_ptr<safe_browsing::PrefixSet>* prefix_set) { 271 scoped_ptr<safe_browsing::PrefixSet>* prefix_set) {
393 const int filter_size = 272 const int filter_size =
394 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); 273 BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
395 *bloom_filter = new BloomFilter(filter_size); 274 *bloom_filter = new BloomFilter(filter_size);
396 if (add_prefixes.empty()) { 275 if (add_prefixes.empty()) {
397 prefix_set->reset(CreateEmptyPrefixSet()); 276 prefix_set->reset(CreateEmptyPrefixSet());
398 return; 277 return;
399 } 278 }
400 279
401 const SBPrefix add_prefixes_checksum = ChecksumAddPrefixes(0, add_prefixes);
402
403 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 280 // TODO(shess): If |add_prefixes| were sorted by the prefix, it
404 // could be passed directly to |PrefixSet()|, removing the need for 281 // could be passed directly to |PrefixSet()|, removing the need for
405 // |prefixes|. 282 // |prefixes|.
406 std::vector<SBPrefix> prefixes; 283 std::vector<SBPrefix> prefixes;
407 prefixes.reserve(add_prefixes.size()); 284 prefixes.reserve(add_prefixes.size());
408 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 285 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
409 iter != add_prefixes.end(); ++iter) { 286 iter != add_prefixes.end(); ++iter) {
410 prefixes.push_back(iter->prefix); 287 prefixes.push_back(iter->prefix);
411 } 288 }
412 std::sort(prefixes.begin(), prefixes.end()); 289 std::sort(prefixes.begin(), prefixes.end());
413 290
414 const SBPrefix unique_prefixes_checksum = ChecksumUniquePrefixes(prefixes);
415
416 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); 291 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
417 iter != prefixes.end(); ++iter) { 292 iter != prefixes.end(); ++iter) {
418 bloom_filter->get()->Insert(*iter); 293 bloom_filter->get()->Insert(*iter);
419 } 294 }
420 295
421 prefix_set->reset(new safe_browsing::PrefixSet(prefixes)); 296 prefix_set->reset(new safe_browsing::PrefixSet(prefixes));
422
423 // "unit test" for ChecksumUniquePrefixes() and GetPrefixesChecksum().
424 if (DCHECK_IS_ON()) {
425 std::vector<SBPrefix> unique(prefixes);
426 unique.erase(std::unique(unique.begin(), unique.end()), unique.end());
427 DCHECK_EQ(0, ChecksumPrefixes(unique_prefixes_checksum, unique));
428
429 std::vector<SBPrefix> restored;
430 prefix_set->get()->GetPrefixes(&restored);
431 DCHECK_EQ(0, ChecksumPrefixes(prefix_set->get()->GetPrefixesChecksum(),
432 restored));
433 }
434
435 // TODO(shess): Need a barrier to prevent ordering checks above here
436 // or construction below here?
437
438 // NOTE(shess): So far, the fallout is:
439 // 605 PREFIX_SET_CHECKSUM (failed prefix_set->CheckChecksum()).
440 // 32 BLOOM_FILTER_CHECKSUM (failed bloom_filter->CheckChecksum()).
441 // 1 ADD_PREFIXES_CHECKSUM (contents of add_prefixes changed).
442 // 7 PREFIXES_CHECKSUM (contents of prefixes changed).
443
444 bool get_prefixes_broken =
445 (unique_prefixes_checksum != prefix_set->get()->GetPrefixesChecksum());
446 bool prefix_set_broken = !prefix_set->get()->CheckChecksum();
447 bool bloom_filter_broken = !bloom_filter->get()->CheckChecksum();
448 bool prefixes_input_broken =
449 (0 != ChecksumPrefixes(add_prefixes_checksum, prefixes));
450 bool add_prefixes_input_broken =
451 (0 != ChecksumAddPrefixes(add_prefixes_checksum, add_prefixes));
452
453 if (add_prefixes_input_broken) {
454 RecordPrefixSetInfo(PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM);
455 } else if (prefixes_input_broken) {
456 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIXES_CHECKSUM);
457 }
458
459 if (prefix_set_broken) {
460 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM);
461
462 // Failing PrefixSet::CheckChecksum() implies that the set's
463 // internal data changed during construction. If the retry
464 // consistently succeeds, that implies memory corruption. If the
465 // retry consistently fails, that implies PrefixSet is broken.
466 scoped_ptr<safe_browsing::PrefixSet> retry_set(
467 new safe_browsing::PrefixSet(prefixes));
468 if (retry_set->CheckChecksum())
469 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED);
470
471 // TODO(shess): In case of double failure, consider pinning the
472 // data and calling NOTREACHED(). But it's a lot of data. Could
473 // also implement a binary search to constrain the amount of input
474 // to consider, but that's a lot of complexity.
475 }
476
477 // TODO(shess): Obviously this is a problem for operation of the
478 // bloom filter! But for purposes of checking prefix set operation,
479 // all that matters is not messing up the histograms recorded later.
480 if (bloom_filter_broken) {
481 RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM);
482
483 // As for PrefixSet, failing BloomFilter::CheckChecksum() implies
484 // that the filter's internal data was changed.
485 scoped_refptr<BloomFilter> retry_filter(new BloomFilter(filter_size));
486 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
487 iter != prefixes.end(); ++iter) {
488 retry_filter->Insert(*iter);
489 }
490 if (retry_filter->CheckChecksum())
491 RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED);
492 }
493
494 // Attempt to isolate the case where the output of GetPrefixes()
495 // would differ from the input presented. This case implies that
496 // PrefixSet has an actual implementation flaw, and may in the
497 // future warrant more aggressive action, such as somehow dumping
498 // |prefixes| up to the crash server.
499 if (get_prefixes_broken && !prefix_set_broken && !prefixes_input_broken)
500 RecordPrefixSetInfo(PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM);
501
502 // If anything broke, clear the prefix set to prevent pollution of
503 // histograms generated later.
504 if (get_prefixes_broken || prefix_set_broken || bloom_filter_broken ||
505 prefixes_input_broken || add_prefixes_input_broken) {
506 DCHECK(!get_prefixes_broken);
507 DCHECK(!prefix_set_broken);
508 DCHECK(!bloom_filter_broken);
509 DCHECK(!prefixes_input_broken);
510 DCHECK(!add_prefixes_input_broken);
511 prefix_set->reset(CreateEmptyPrefixSet());
512 }
513 } 297 }
514 298
515 } // namespace 299 } // namespace
516 300
517 // The default SafeBrowsingDatabaseFactory. 301 // The default SafeBrowsingDatabaseFactory.
518 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 302 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
519 public: 303 public:
520 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 304 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
521 bool enable_download_protection, 305 bool enable_download_protection,
522 bool enable_client_side_whitelist, 306 bool enable_client_side_whitelist,
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
757 BrowseFullHashesToCheck(url, false, &full_hashes); 541 BrowseFullHashesToCheck(url, false, &full_hashes);
758 if (full_hashes.empty()) 542 if (full_hashes.empty())
759 return false; 543 return false;
760 544
761 // This function is called on the I/O thread, prevent changes to 545 // This function is called on the I/O thread, prevent changes to
762 // bloom filter and caches. 546 // bloom filter and caches.
763 base::AutoLock locked(lookup_lock_); 547 base::AutoLock locked(lookup_lock_);
764 548
765 if (!browse_bloom_filter_.get()) 549 if (!browse_bloom_filter_.get())
766 return false; 550 return false;
767 DCHECK(prefix_set_.get());
768
769 // |prefix_set_| is empty until the first update, only log info if
770 // not empty.
771 const bool prefix_set_empty = !prefix_set_->GetSize();
772
773 // Track cases where the filters were not consistent with each other.
774 bool bloom_hit_prefix_miss = false;
775 bool bloom_miss_prefix_hit = false;
776 551
777 size_t miss_count = 0; 552 size_t miss_count = 0;
778 for (size_t i = 0; i < full_hashes.size(); ++i) { 553 for (size_t i = 0; i < full_hashes.size(); ++i) {
779 bool found = prefix_set_->Exists(full_hashes[i].prefix);
780
781 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { 554 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
782 if (!prefix_set_empty) {
783 RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT);
784 // This should be less than PREFIX_SET_BLOOM_HIT by the
785 // false positive rate.
786 if (found) {
787 RecordPrefixSetInfo(PREFIX_SET_HIT);
788 } else {
789 // Could be false positive, but _could_ be corruption.
790 bloom_hit_prefix_miss = true;
791 }
792 }
793 prefix_hits->push_back(full_hashes[i].prefix); 555 prefix_hits->push_back(full_hashes[i].prefix);
794 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) 556 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
795 ++miss_count; 557 ++miss_count;
796 } else {
797 // Bloom filter misses should never be in prefix set. Check
798 // to see if the prefix set or bloom filter have changed since
799 // being created.
800 DCHECK(!found);
801 if (found && !prefix_set_empty)
802 bloom_miss_prefix_hit = true;
803 } 558 }
804 } 559 }
805 560
806 // In case of inconsistent results, verify the checksums and record
807 // failures (in case of corruption inconsistencies would be
808 // expected). In case of corruption clear out |prefix_set_|, once
809 // we've noticed corruption there is no point to future comparisons.
810 if (bloom_miss_prefix_hit || bloom_hit_prefix_miss) {
811 if (!prefix_set_->CheckChecksum()) {
812 RecordPrefixSetInfo(PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM);
813 prefix_set_.reset(CreateEmptyPrefixSet());
814 } else if (!browse_bloom_filter_->CheckChecksum()) {
815 RecordPrefixSetInfo(PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM);
816 prefix_set_.reset(CreateEmptyPrefixSet());
817 } else if (bloom_miss_prefix_hit) {
818 // This case should be impossible if the filters are both valid.
819 RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_HIT);
820 }
821 }
822
823 // If all the prefixes are cached as 'misses', don't issue a GetHash. 561 // If all the prefixes are cached as 'misses', don't issue a GetHash.
824 if (miss_count == prefix_hits->size()) 562 if (miss_count == prefix_hits->size())
825 return false; 563 return false;
826 564
827 // Find the matching full-hash results. |full_browse_hashes_| are from the 565 // Find the matching full-hash results. |full_browse_hashes_| are from the
828 // database, |pending_browse_hashes_| are from GetHash requests between 566 // database, |pending_browse_hashes_| are from GetHash requests between
829 // updates. 567 // updates.
830 std::sort(prefix_hits->begin(), prefix_hits->end()); 568 std::sort(prefix_hits->begin(), prefix_hits->end());
831 569
832 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, 570 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
(...skipping 759 matching lines...) Expand 10 before | Expand all | Expand 10 after
1592 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1330 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1593 kill_switch)) { 1331 kill_switch)) {
1594 // The kill switch is whitelisted hence we whitelist all URLs. 1332 // The kill switch is whitelisted hence we whitelist all URLs.
1595 WhitelistEverything(whitelist); 1333 WhitelistEverything(whitelist);
1596 } else { 1334 } else {
1597 base::AutoLock locked(lookup_lock_); 1335 base::AutoLock locked(lookup_lock_);
1598 whitelist->second = false; 1336 whitelist->second = false;
1599 whitelist->first.swap(new_whitelist); 1337 whitelist->first.swap(new_whitelist);
1600 } 1338 }
1601 } 1339 }
OLDNEW
« no previous file with comments | « chrome/browser/safe_browsing/prefix_set_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698