OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/bind.h" | 10 #include "base/bind.h" |
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
251 if (subs_deleted > 0) | 251 if (subs_deleted > 0) |
252 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); | 252 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); |
253 } | 253 } |
254 | 254 |
255 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from | 255 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from |
256 // safe_browsing_store.h orders on both chunk-id and prefix. | 256 // safe_browsing_store.h orders on both chunk-id and prefix. |
257 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { | 257 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { |
258 return a.full_hash.prefix < b.full_hash.prefix; | 258 return a.full_hash.prefix < b.full_hash.prefix; |
259 } | 259 } |
260 | 260 |
261 // As compared to the bloom filter, PrefixSet should have these | |
262 // properties: | |
263 // - Any bloom filter miss should be a prefix set miss. | |
264 // - Any prefix set hit should be a bloom filter hit. | |
265 // - Bloom filter false positives are prefix set misses. | |
266 // The following is to log actual performance to verify this. | |
267 enum PrefixSetEvent { | |
268 // Hits to prefix set and bloom filter. | |
269 PREFIX_SET_HIT, | |
270 PREFIX_SET_BLOOM_HIT, | |
271 // These were to track bloom misses which hit the prefix set, with | |
272 // _INVALID to track where the item didn't appear to actually be in | |
273 // the prefix set. _INVALID was never hit. | |
274 PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_OBSOLETE, | |
275 PREFIX_SET_BLOOM_MISS_PREFIX_SET_HIT_INVALID_OBSOLETE, | |
276 // GetPrefixes() after creation failed to get the same prefixes. | |
277 PREFIX_SET_GETPREFIXES_BROKEN, | |
278 // Fine-grained tests which didn't provide any good direction. | |
279 PREFIX_SET_GETPREFIXES_BROKEN_SIZE_OBSOLETE, | |
280 PREFIX_SET_GETPREFIXES_FIRST_BROKEN_OBSOLETE, | |
281 PREFIX_SET_SBPREFIX_WAS_BROKEN_OBSOLETE, | |
282 PREFIX_SET_GETPREFIXES_BROKEN_SORTING_OBSOLETE, | |
283 PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION_OBSOLETE, | |
284 PREFIX_SET_GETPREFIXES_UNSORTED_IS_DELTA_OBSOLETE, | |
285 PREFIX_SET_GETPREFIXES_UNSORTED_IS_INDEX_OBSOLETE, | |
286 // Failed checksums when creating filters. | |
287 PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM, | |
288 PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM, | |
289 PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM, | |
290 PREFIX_SET_CREATE_PREFIXES_CHECKSUM, | |
291 PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM, | |
292 // Failed checksums when probing the filters. | |
293 PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM, | |
294 PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM, | |
295 | |
296 // Recorded once per update if the impossible occurs. | |
297 PREFIX_SET_BLOOM_MISS_PREFIX_HIT, | |
298 | |
299 // Corresponding CHECKSUM failed, but on retry it succeeded. | |
300 PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED, | |
301 PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED, | |
302 | |
303 // Memory space for histograms is determined by the max. ALWAYS ADD | |
304 // NEW VALUES BEFORE THIS ONE. | |
305 PREFIX_SET_EVENT_MAX | |
306 }; | |
307 | |
308 void RecordPrefixSetInfo(PrefixSetEvent event_type) { | |
309 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type, | |
310 PREFIX_SET_EVENT_MAX); | |
311 } | |
312 | |
313 // Helper to reduce code duplication. | 261 // Helper to reduce code duplication. |
314 safe_browsing::PrefixSet* CreateEmptyPrefixSet() { | 262 safe_browsing::PrefixSet* CreateEmptyPrefixSet() { |
315 return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); | 263 return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); |
316 } | 264 } |
317 | 265 |
318 // Generate xor "checksum" of SBPrefix part of add prefixes. Pass 0 | |
319 // for |seed| will return a checksum, passing a previous checksum for | |
320 // |seed| will return 0 (if it checks out). Coded this way in hopes | |
321 // that the compiler won't optimize separate runs into a single | |
322 // temporary. | |
323 SBPrefix ChecksumAddPrefixes(SBPrefix seed, | |
324 const SBAddPrefixes& add_prefixes) { | |
325 SBPrefix checksum = seed; | |
326 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); | |
327 iter != add_prefixes.end(); ++iter) { | |
328 checksum ^= iter->prefix; | |
329 } | |
330 return checksum; | |
331 } | |
332 | |
333 // Generate xor "checksum" of prefixes. | |
334 SBPrefix ChecksumPrefixes(SBPrefix seed, | |
335 const std::vector<SBPrefix>& prefixes) { | |
336 SBPrefix checksum = seed; | |
337 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); | |
338 iter != prefixes.end(); ++iter) { | |
339 checksum ^= *iter; | |
340 } | |
341 return checksum; | |
342 } | |
343 | |
344 // Prefix set doesn't store duplicates, making it hard to verify that | |
345 // checksums match. Also hard is verifying that nothing was corrupted | |
346 // while removing duplicates from a vector. So this generates a | |
347 // checksum of only the unique elements. | |
348 SBPrefix ChecksumUniquePrefixes(const std::vector<SBPrefix>& prefixes) { | |
349 // Handle edge case up front. | |
350 if (prefixes.size() == 0) | |
351 return 0; | |
352 | |
353 std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); | |
354 SBPrefix checksum = *iter++; | |
355 while (iter != prefixes.end()) { | |
356 if (*(iter - 1) != *iter) | |
357 checksum ^= *iter; | |
358 iter++; | |
359 } | |
360 return checksum; | |
361 } | |
362 | |
363 // NOTE(shess): Past histograms have indicated that in a given day, | |
364 // about 1 in 100,000 updates result in a PrefixSet which was | |
365 // inconsistent relative to the BloomFilter. Windows is about 5x more | |
366 // likely to build an inconsistent PrefixSet than Mac. A number of | |
367 // developers have reviewed the code, and I ran extensive fuzzing with | |
368 // random data, so at this point I'm trying to demonstrate memory | |
369 // corruption. | |
370 // | |
371 // Other findings from past instrumentation: | |
372 // - half of one percent of brokenness cases implied duplicate items | |
373 // in |prefixes|. Per the code above, this should not be possible. | |
374 // - about 1/20 of broken cases happened more than once for a given | |
375 // user. Note that empty updates generally don't hit this code at | |
376 // all, so that may not imply a specific input pattern breaking | |
377 // things. | |
378 // - about 1/3 of broken cases show a checksum mismatch between the | |
379 // checksum calculated while creating |prefix_set| and the checksum | |
380 // calculated immediately after creation. This is almost certainly | |
381 // memory corruption. | |
382 | |
383 // Generate |PrefixSet| and |BloomFilter| instances from the contents | 266 // Generate |PrefixSet| and |BloomFilter| instances from the contents |
384 // of |add_prefixes|. Verify that the results are internally | 267 // of |add_prefixes|. |
385 // consistent, and that the input maintained consistence while | |
386 // constructing (which should assure that they are mutually | |
387 // consistent). Returns an empty prefix set if any of the checks | |
388 // fail. | |
389 void FiltersFromAddPrefixes( | 268 void FiltersFromAddPrefixes( |
390 const SBAddPrefixes& add_prefixes, | 269 const SBAddPrefixes& add_prefixes, |
391 scoped_refptr<BloomFilter>* bloom_filter, | 270 scoped_refptr<BloomFilter>* bloom_filter, |
392 scoped_ptr<safe_browsing::PrefixSet>* prefix_set) { | 271 scoped_ptr<safe_browsing::PrefixSet>* prefix_set) { |
393 const int filter_size = | 272 const int filter_size = |
394 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); | 273 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); |
395 *bloom_filter = new BloomFilter(filter_size); | 274 *bloom_filter = new BloomFilter(filter_size); |
396 if (add_prefixes.empty()) { | 275 if (add_prefixes.empty()) { |
397 prefix_set->reset(CreateEmptyPrefixSet()); | 276 prefix_set->reset(CreateEmptyPrefixSet()); |
398 return; | 277 return; |
399 } | 278 } |
400 | 279 |
401 const SBPrefix add_prefixes_checksum = ChecksumAddPrefixes(0, add_prefixes); | |
402 | |
403 // TODO(shess): If |add_prefixes| were sorted by the prefix, it | 280 // TODO(shess): If |add_prefixes| were sorted by the prefix, it |
404 // could be passed directly to |PrefixSet()|, removing the need for | 281 // could be passed directly to |PrefixSet()|, removing the need for |
405 // |prefixes|. | 282 // |prefixes|. |
406 std::vector<SBPrefix> prefixes; | 283 std::vector<SBPrefix> prefixes; |
407 prefixes.reserve(add_prefixes.size()); | 284 prefixes.reserve(add_prefixes.size()); |
408 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); | 285 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); |
409 iter != add_prefixes.end(); ++iter) { | 286 iter != add_prefixes.end(); ++iter) { |
410 prefixes.push_back(iter->prefix); | 287 prefixes.push_back(iter->prefix); |
411 } | 288 } |
412 std::sort(prefixes.begin(), prefixes.end()); | 289 std::sort(prefixes.begin(), prefixes.end()); |
413 | 290 |
414 const SBPrefix unique_prefixes_checksum = ChecksumUniquePrefixes(prefixes); | |
415 | |
416 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); | 291 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
417 iter != prefixes.end(); ++iter) { | 292 iter != prefixes.end(); ++iter) { |
418 bloom_filter->get()->Insert(*iter); | 293 bloom_filter->get()->Insert(*iter); |
419 } | 294 } |
420 | 295 |
421 prefix_set->reset(new safe_browsing::PrefixSet(prefixes)); | 296 prefix_set->reset(new safe_browsing::PrefixSet(prefixes)); |
422 | |
423 // "unit test" for ChecksumUniquePrefixes() and GetPrefixesChecksum(). | |
424 if (DCHECK_IS_ON()) { | |
425 std::vector<SBPrefix> unique(prefixes); | |
426 unique.erase(std::unique(unique.begin(), unique.end()), unique.end()); | |
427 DCHECK_EQ(0, ChecksumPrefixes(unique_prefixes_checksum, unique)); | |
428 | |
429 std::vector<SBPrefix> restored; | |
430 prefix_set->get()->GetPrefixes(&restored); | |
431 DCHECK_EQ(0, ChecksumPrefixes(prefix_set->get()->GetPrefixesChecksum(), | |
432 restored)); | |
433 } | |
434 | |
435 // TODO(shess): Need a barrier to prevent ordering checks above here | |
436 // or construction below here? | |
437 | |
438 // NOTE(shess): So far, the fallout is: | |
439 // 605 PREFIX_SET_CHECKSUM (failed prefix_set->CheckChecksum()). | |
440 // 32 BLOOM_FILTER_CHECKSUM (failed bloom_filter->CheckChecksum()). | |
441 // 1 ADD_PREFIXES_CHECKSUM (contents of add_prefixes changed). | |
442 // 7 PREFIXES_CHECKSUM (contents of prefixes changed). | |
443 | |
444 bool get_prefixes_broken = | |
445 (unique_prefixes_checksum != prefix_set->get()->GetPrefixesChecksum()); | |
446 bool prefix_set_broken = !prefix_set->get()->CheckChecksum(); | |
447 bool bloom_filter_broken = !bloom_filter->get()->CheckChecksum(); | |
448 bool prefixes_input_broken = | |
449 (0 != ChecksumPrefixes(add_prefixes_checksum, prefixes)); | |
450 bool add_prefixes_input_broken = | |
451 (0 != ChecksumAddPrefixes(add_prefixes_checksum, add_prefixes)); | |
452 | |
453 if (add_prefixes_input_broken) { | |
454 RecordPrefixSetInfo(PREFIX_SET_CREATE_ADD_PREFIXES_CHECKSUM); | |
455 } else if (prefixes_input_broken) { | |
456 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIXES_CHECKSUM); | |
457 } | |
458 | |
459 if (prefix_set_broken) { | |
460 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM); | |
461 | |
462 // Failing PrefixSet::CheckChecksum() implies that the set's | |
463 // internal data changed during construction. If the retry | |
464 // consistently succeeds, that implies memory corruption. If the | |
465 // retry consistently fails, that implies PrefixSet is broken. | |
466 scoped_ptr<safe_browsing::PrefixSet> retry_set( | |
467 new safe_browsing::PrefixSet(prefixes)); | |
468 if (retry_set->CheckChecksum()) | |
469 RecordPrefixSetInfo(PREFIX_SET_CREATE_PREFIX_SET_CHECKSUM_SUCCEEDED); | |
470 | |
471 // TODO(shess): In case of double failure, consider pinning the | |
472 // data and calling NOTREACHED(). But it's a lot of data. Could | |
473 // also implement a binary search to constrain the amount of input | |
474 // to consider, but that's a lot of complexity. | |
475 } | |
476 | |
477 // TODO(shess): Obviously this is a problem for operation of the | |
478 // bloom filter! But for purposes of checking prefix set operation, | |
479 // all that matters is not messing up the histograms recorded later. | |
480 if (bloom_filter_broken) { | |
481 RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM); | |
482 | |
483 // As for PrefixSet, failing BloomFilter::CheckChecksum() implies | |
484 // that the filter's internal data was changed. | |
485 scoped_refptr<BloomFilter> retry_filter(new BloomFilter(filter_size)); | |
486 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); | |
487 iter != prefixes.end(); ++iter) { | |
488 retry_filter->Insert(*iter); | |
489 } | |
490 if (retry_filter->CheckChecksum()) | |
491 RecordPrefixSetInfo(PREFIX_SET_CREATE_BLOOM_FILTER_CHECKSUM_SUCCEEDED); | |
492 } | |
493 | |
494 // Attempt to isolate the case where the output of GetPrefixes() | |
495 // would differ from the input presented. This case implies that | |
496 // PrefixSet has an actual implementation flaw, and may in the | |
497 // future warrant more aggressive action, such as somehow dumping | |
498 // |prefixes| up to the crash server. | |
499 if (get_prefixes_broken && !prefix_set_broken && !prefixes_input_broken) | |
500 RecordPrefixSetInfo(PREFIX_SET_CREATE_GET_PREFIXES_CHECKSUM); | |
501 | |
502 // If anything broke, clear the prefix set to prevent pollution of | |
503 // histograms generated later. | |
504 if (get_prefixes_broken || prefix_set_broken || bloom_filter_broken || | |
505 prefixes_input_broken || add_prefixes_input_broken) { | |
506 DCHECK(!get_prefixes_broken); | |
507 DCHECK(!prefix_set_broken); | |
508 DCHECK(!bloom_filter_broken); | |
509 DCHECK(!prefixes_input_broken); | |
510 DCHECK(!add_prefixes_input_broken); | |
511 prefix_set->reset(CreateEmptyPrefixSet()); | |
512 } | |
513 } | 297 } |
514 | 298 |
515 } // namespace | 299 } // namespace |
516 | 300 |
517 // The default SafeBrowsingDatabaseFactory. | 301 // The default SafeBrowsingDatabaseFactory. |
518 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { | 302 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { |
519 public: | 303 public: |
520 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( | 304 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( |
521 bool enable_download_protection, | 305 bool enable_download_protection, |
522 bool enable_client_side_whitelist, | 306 bool enable_client_side_whitelist, |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
757 BrowseFullHashesToCheck(url, false, &full_hashes); | 541 BrowseFullHashesToCheck(url, false, &full_hashes); |
758 if (full_hashes.empty()) | 542 if (full_hashes.empty()) |
759 return false; | 543 return false; |
760 | 544 |
761 // This function is called on the I/O thread, prevent changes to | 545 // This function is called on the I/O thread, prevent changes to |
762 // bloom filter and caches. | 546 // bloom filter and caches. |
763 base::AutoLock locked(lookup_lock_); | 547 base::AutoLock locked(lookup_lock_); |
764 | 548 |
765 if (!browse_bloom_filter_.get()) | 549 if (!browse_bloom_filter_.get()) |
766 return false; | 550 return false; |
767 DCHECK(prefix_set_.get()); | |
768 | |
769 // |prefix_set_| is empty until the first update, only log info if | |
770 // not empty. | |
771 const bool prefix_set_empty = !prefix_set_->GetSize(); | |
772 | |
773 // Track cases where the filters were not consistent with each other. | |
774 bool bloom_hit_prefix_miss = false; | |
775 bool bloom_miss_prefix_hit = false; | |
776 | 551 |
777 size_t miss_count = 0; | 552 size_t miss_count = 0; |
778 for (size_t i = 0; i < full_hashes.size(); ++i) { | 553 for (size_t i = 0; i < full_hashes.size(); ++i) { |
779 bool found = prefix_set_->Exists(full_hashes[i].prefix); | |
780 | |
781 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { | 554 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { |
782 if (!prefix_set_empty) { | |
783 RecordPrefixSetInfo(PREFIX_SET_BLOOM_HIT); | |
784 // This should be less than PREFIX_SET_BLOOM_HIT by the | |
785 // false positive rate. | |
786 if (found) { | |
787 RecordPrefixSetInfo(PREFIX_SET_HIT); | |
788 } else { | |
789 // Could be false positive, but _could_ be corruption. | |
790 bloom_hit_prefix_miss = true; | |
791 } | |
792 } | |
793 prefix_hits->push_back(full_hashes[i].prefix); | 555 prefix_hits->push_back(full_hashes[i].prefix); |
794 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) | 556 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) |
795 ++miss_count; | 557 ++miss_count; |
796 } else { | |
797 // Bloom filter misses should never be in prefix set. Check | |
798 // to see if the prefix set or bloom filter have changed since | |
799 // being created. | |
800 DCHECK(!found); | |
801 if (found && !prefix_set_empty) | |
802 bloom_miss_prefix_hit = true; | |
803 } | 558 } |
804 } | 559 } |
805 | 560 |
806 // In case of inconsistent results, verify the checksums and record | |
807 // failures (in case of corruption inconsistencies would be | |
808 // expected). In case of corruption clear out |prefix_set_|, once | |
809 // we've noticed corruption there is no point to future comparisons. | |
810 if (bloom_miss_prefix_hit || bloom_hit_prefix_miss) { | |
811 if (!prefix_set_->CheckChecksum()) { | |
812 RecordPrefixSetInfo(PREFIX_SET_MISMATCH_PREFIX_SET_CHECKSUM); | |
813 prefix_set_.reset(CreateEmptyPrefixSet()); | |
814 } else if (!browse_bloom_filter_->CheckChecksum()) { | |
815 RecordPrefixSetInfo(PREFIX_SET_MISMATCH_BLOOM_FILTER_CHECKSUM); | |
816 prefix_set_.reset(CreateEmptyPrefixSet()); | |
817 } else if (bloom_miss_prefix_hit) { | |
818 // This case should be impossible if the filters are both valid. | |
819 RecordPrefixSetInfo(PREFIX_SET_BLOOM_MISS_PREFIX_HIT); | |
820 } | |
821 } | |
822 | |
823 // If all the prefixes are cached as 'misses', don't issue a GetHash. | 561 // If all the prefixes are cached as 'misses', don't issue a GetHash. |
824 if (miss_count == prefix_hits->size()) | 562 if (miss_count == prefix_hits->size()) |
825 return false; | 563 return false; |
826 | 564 |
827 // Find the matching full-hash results. |full_browse_hashes_| are from the | 565 // Find the matching full-hash results. |full_browse_hashes_| are from the |
828 // database, |pending_browse_hashes_| are from GetHash requests between | 566 // database, |pending_browse_hashes_| are from GetHash requests between |
829 // updates. | 567 // updates. |
830 std::sort(prefix_hits->begin(), prefix_hits->end()); | 568 std::sort(prefix_hits->begin(), prefix_hits->end()); |
831 | 569 |
832 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, | 570 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, |
(...skipping 759 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1592 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), | 1330 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), |
1593 kill_switch)) { | 1331 kill_switch)) { |
1594 // The kill switch is whitelisted hence we whitelist all URLs. | 1332 // The kill switch is whitelisted hence we whitelist all URLs. |
1595 WhitelistEverything(whitelist); | 1333 WhitelistEverything(whitelist); |
1596 } else { | 1334 } else { |
1597 base::AutoLock locked(lookup_lock_); | 1335 base::AutoLock locked(lookup_lock_); |
1598 whitelist->second = false; | 1336 whitelist->second = false; |
1599 whitelist->first.swap(new_whitelist); | 1337 whitelist->first.swap(new_whitelist); |
1600 } | 1338 } |
1601 } | 1339 } |
OLD | NEW |