Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 10896048: Transition safe browsing from bloom filter to prefix set. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix prefix set read/write for empty/sparse sets. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 13 matching lines...) Expand all
24 #if defined(OS_MACOSX) 24 #if defined(OS_MACOSX)
25 #include "base/mac/mac_util.h" 25 #include "base/mac/mac_util.h"
26 #endif 26 #endif
27 27
28 using content::BrowserThread; 28 using content::BrowserThread;
29 29
30 namespace { 30 namespace {
31 31
32 // Filename suffix for the bloom filter. 32 // Filename suffix for the bloom filter.
33 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); 33 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
34 // Filename suffix for the prefix set.
35 const FilePath::CharType kPrefixSetFile[] = FILE_PATH_LITERAL(" Prefix Set");
34 // Filename suffix for download store. 36 // Filename suffix for download store.
35 const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); 37 const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
36 // Filename suffix for client-side phishing detection whitelist store. 38 // Filename suffix for client-side phishing detection whitelist store.
37 const FilePath::CharType kCsdWhitelistDBFile[] = 39 const FilePath::CharType kCsdWhitelistDBFile[] =
38 FILE_PATH_LITERAL(" Csd Whitelist"); 40 FILE_PATH_LITERAL(" Csd Whitelist");
39 // Filename suffix for the download whitelist store. 41 // Filename suffix for the download whitelist store.
40 const FilePath::CharType kDownloadWhitelistDBFile[] = 42 const FilePath::CharType kDownloadWhitelistDBFile[] =
41 FILE_PATH_LITERAL(" Download Whitelist"); 43 FILE_PATH_LITERAL(" Download Whitelist");
42 // Filename suffix for browse store. 44 // Filename suffix for browse store.
43 // TODO(lzheng): change to a better name when we change the file format. 45 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
46 // Unfortunately, to change the name implies lots of transition code
47 // for little benefit. If/when file formats change (say to put all
48 // the data in one file), that would be a convenient point to rectify
49 // this.
44 const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 50 const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
45 51
46 // The maximum staleness for a cached entry. 52 // The maximum staleness for a cached entry.
47 const int kMaxStalenessMinutes = 45; 53 const int kMaxStalenessMinutes = 45;
48 54
49 // Maximum number of entries we allow in any of the whitelists. 55 // Maximum number of entries we allow in any of the whitelists.
50 // If a whitelist on disk contains more entries then all lookups to 56 // If a whitelist on disk contains more entries then all lookups to
51 // the whitelist will be considered a match. 57 // the whitelist will be considered a match.
52 const size_t kMaxWhitelistSize = 5000; 58 const size_t kMaxWhitelistSize = 5000;
53 59
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 if (subs_deleted > 0) 257 if (subs_deleted > 0)
252 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); 258 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
253 } 259 }
254 260
255 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 261 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
256 // safe_browsing_store.h orders on both chunk-id and prefix. 262 // safe_browsing_store.h orders on both chunk-id and prefix.
257 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 263 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
258 return a.full_hash.prefix < b.full_hash.prefix; 264 return a.full_hash.prefix < b.full_hash.prefix;
259 } 265 }
260 266
261 // Helper to reduce code duplication. 267 // Track what LoadBloomFilterOrPrefixSet() loaded.
262 safe_browsing::PrefixSet* CreateEmptyPrefixSet() { 268 enum FilterLoad {
263 return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); 269 FILTER_LOAD, // All calls.
270 FILTER_LOADED_PREFIX_SET, // Cases loaded from prefix set.
271 FILTER_LOADED_BLOOM_FILTER, // Cases loaded from bloom filter.
272
273 // Memory space for histograms is determined by the max. ALWAYS ADD
274 // NEW VALUES BEFORE THIS ONE.
275 FILTER_LOAD_MAX
276 };
277
278 void RecordFilterLoad(FilterLoad event_type) {
279 UMA_HISTOGRAM_ENUMERATION("SB2.FilterLoad", event_type,
280 FILTER_LOAD_MAX);
264 } 281 }
265 282
266 // Generate |PrefixSet| and |BloomFilter| instances from the contents 283 // This code always checks for non-zero file size. This helper makes
267 // of |add_prefixes|. 284 // that less verbose.
268 void FiltersFromAddPrefixes( 285 int64 GetFileSizeOrZero(const FilePath& file_path) {
269 const SBAddPrefixes& add_prefixes, 286 int64 size_64;
270 scoped_refptr<BloomFilter>* bloom_filter, 287 if (!file_util::GetFileSize(file_path, &size_64))
271 scoped_ptr<safe_browsing::PrefixSet>* prefix_set) { 288 return 0;
272 const int filter_size = 289 return size_64;
273 BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
274 *bloom_filter = new BloomFilter(filter_size);
275 if (add_prefixes.empty()) {
276 prefix_set->reset(CreateEmptyPrefixSet());
277 return;
278 }
279
280 // TODO(shess): If |add_prefixes| were sorted by the prefix, it
281 // could be passed directly to |PrefixSet()|, removing the need for
282 // |prefixes|.
283 std::vector<SBPrefix> prefixes;
284 prefixes.reserve(add_prefixes.size());
285 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
286 iter != add_prefixes.end(); ++iter) {
287 prefixes.push_back(iter->prefix);
288 }
289 std::sort(prefixes.begin(), prefixes.end());
290
291 for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
292 iter != prefixes.end(); ++iter) {
293 bloom_filter->get()->Insert(*iter);
294 }
295
296 prefix_set->reset(new safe_browsing::PrefixSet(prefixes));
297 } 290 }
298 291
299 } // namespace 292 } // namespace
300 293
301 // The default SafeBrowsingDatabaseFactory. 294 // The default SafeBrowsingDatabaseFactory.
302 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 295 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
303 public: 296 public:
304 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 297 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
305 bool enable_download_protection, 298 bool enable_download_protection,
306 bool enable_client_side_whitelist, 299 bool enable_client_side_whitelist,
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
352 return FilePath(db_base_filename.value() + kDownloadDBFile); 345 return FilePath(db_base_filename.value() + kDownloadDBFile);
353 } 346 }
354 347
355 // static 348 // static
356 FilePath SafeBrowsingDatabase::BloomFilterForFilename( 349 FilePath SafeBrowsingDatabase::BloomFilterForFilename(
357 const FilePath& db_filename) { 350 const FilePath& db_filename) {
358 return FilePath(db_filename.value() + kBloomFilterFile); 351 return FilePath(db_filename.value() + kBloomFilterFile);
359 } 352 }
360 353
361 // static 354 // static
355 FilePath SafeBrowsingDatabase::PrefixSetForFilename(
356 const FilePath& db_filename) {
357 return FilePath(db_filename.value() + kPrefixSetFile);
358 }
359
360 // static
362 FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 361 FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
363 const FilePath& db_filename) { 362 const FilePath& db_filename) {
364 return FilePath(db_filename.value() + kCsdWhitelistDBFile); 363 return FilePath(db_filename.value() + kCsdWhitelistDBFile);
365 } 364 }
366 365
367 // static 366 // static
368 FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 367 FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
369 const FilePath& db_filename) { 368 const FilePath& db_filename) {
370 return FilePath(db_filename.value() + kDownloadWhitelistDBFile); 369 return FilePath(db_filename.value() + kDownloadWhitelistDBFile);
371 } 370 }
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
426 void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { 425 void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
427 DCHECK_EQ(creation_loop_, MessageLoop::current()); 426 DCHECK_EQ(creation_loop_, MessageLoop::current());
428 // Ensure we haven't been run before. 427 // Ensure we haven't been run before.
429 DCHECK(browse_filename_.empty()); 428 DCHECK(browse_filename_.empty());
430 DCHECK(download_filename_.empty()); 429 DCHECK(download_filename_.empty());
431 DCHECK(csd_whitelist_filename_.empty()); 430 DCHECK(csd_whitelist_filename_.empty());
432 DCHECK(download_whitelist_filename_.empty()); 431 DCHECK(download_whitelist_filename_.empty());
433 432
434 browse_filename_ = BrowseDBFilename(filename_base); 433 browse_filename_ = BrowseDBFilename(filename_base);
435 bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); 434 bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
435 prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
436 436
437 browse_store_->Init( 437 browse_store_->Init(
438 browse_filename_, 438 browse_filename_,
439 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 439 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
440 base::Unretained(this))); 440 base::Unretained(this)));
441 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 441 DVLOG(1) << "Init browse store: " << browse_filename_.value();
442 442
443 { 443 {
444 // NOTE: There is no need to grab the lock in this function, since 444 // NOTE: There is no need to grab the lock in this function, since
445 // until it returns, there are no pointers to this class on other 445 // until it returns, there are no pointers to this class on other
446 // threads. Then again, that means there is no possibility of 446 // threads. Then again, that means there is no possibility of
447 // contention on the lock... 447 // contention on the lock...
448 base::AutoLock locked(lookup_lock_); 448 base::AutoLock locked(lookup_lock_);
449 full_browse_hashes_.clear(); 449 full_browse_hashes_.clear();
450 pending_browse_hashes_.clear(); 450 pending_browse_hashes_.clear();
451 LoadBloomFilter(); 451 LoadBloomFilterOrPrefixSet();
452 } 452 }
453 453
454 if (download_store_.get()) { 454 if (download_store_.get()) {
455 download_filename_ = DownloadDBFilename(filename_base); 455 download_filename_ = DownloadDBFilename(filename_base);
456 download_store_->Init( 456 download_store_->Init(
457 download_filename_, 457 download_filename_,
458 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 458 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
459 base::Unretained(this))); 459 base::Unretained(this)));
460 DVLOG(1) << "Init download store: " << download_filename_.value(); 460 DVLOG(1) << "Init download store: " << download_filename_.value();
461 } 461 }
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
504 // reset. Perhaps inline |Delete()|? 504 // reset. Perhaps inline |Delete()|?
505 if (!Delete()) 505 if (!Delete())
506 return false; 506 return false;
507 507
508 // Reset objects in memory. 508 // Reset objects in memory.
509 { 509 {
510 base::AutoLock locked(lookup_lock_); 510 base::AutoLock locked(lookup_lock_);
511 full_browse_hashes_.clear(); 511 full_browse_hashes_.clear();
512 pending_browse_hashes_.clear(); 512 pending_browse_hashes_.clear();
513 prefix_miss_cache_.clear(); 513 prefix_miss_cache_.clear();
514 // TODO(shess): This could probably be |bloom_filter_.reset()|. 514 browse_bloom_filter_ = NULL;
515 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * 515 prefix_set_.reset();
516 BloomFilter::kBloomFilterSizeRatio);
517 // TODO(shess): It is simpler for the code to assume that presence
518 // of a bloom filter always implies presence of a prefix set.
519 prefix_set_.reset(CreateEmptyPrefixSet());
520 } 516 }
521 // Wants to acquire the lock itself. 517 // Wants to acquire the lock itself.
522 WhitelistEverything(&csd_whitelist_); 518 WhitelistEverything(&csd_whitelist_);
523 WhitelistEverything(&download_whitelist_); 519 WhitelistEverything(&download_whitelist_);
524 520
525 return true; 521 return true;
526 } 522 }
527 523
528 // TODO(lzheng): Remove matching_list, it is not used anywhere. 524 // TODO(lzheng): Remove matching_list, it is not used anywhere.
529 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 525 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
530 const GURL& url, 526 const GURL& url,
531 std::string* matching_list, 527 std::string* matching_list,
532 std::vector<SBPrefix>* prefix_hits, 528 std::vector<SBPrefix>* prefix_hits,
533 std::vector<SBFullHashResult>* full_hits, 529 std::vector<SBFullHashResult>* full_hits,
534 base::Time last_update) { 530 base::Time last_update) {
535 // Clear the results first. 531 // Clear the results first.
536 matching_list->clear(); 532 matching_list->clear();
537 prefix_hits->clear(); 533 prefix_hits->clear();
538 full_hits->clear(); 534 full_hits->clear();
539 535
540 std::vector<SBFullHash> full_hashes; 536 std::vector<SBFullHash> full_hashes;
541 BrowseFullHashesToCheck(url, false, &full_hashes); 537 BrowseFullHashesToCheck(url, false, &full_hashes);
542 if (full_hashes.empty()) 538 if (full_hashes.empty())
543 return false; 539 return false;
544 540
545 // This function is called on the I/O thread, prevent changes to 541 // This function is called on the I/O thread, prevent changes to
546 // bloom filter and caches. 542 // filter and caches.
547 base::AutoLock locked(lookup_lock_); 543 base::AutoLock locked(lookup_lock_);
548 544
549 if (!browse_bloom_filter_.get()) 545 // TODO(shess): During transition, users will have a bloom filter
546 // but no prefix set until first update, after which they'll have a
547 // prefix set but no bloom filter.
548 const bool use_prefix_set = prefix_set_.get() != NULL;
549 if (!use_prefix_set && !browse_bloom_filter_.get())
550 return false; 550 return false;
551 551
552 size_t miss_count = 0; 552 size_t miss_count = 0;
553 for (size_t i = 0; i < full_hashes.size(); ++i) { 553 for (size_t i = 0; i < full_hashes.size(); ++i) {
554 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { 554 const SBPrefix prefix = full_hashes[i].prefix;
555 prefix_hits->push_back(full_hashes[i].prefix); 555 if ((use_prefix_set && prefix_set_->Exists(prefix)) ||
556 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) 556 (!use_prefix_set && browse_bloom_filter_->Exists(prefix))) {
557 prefix_hits->push_back(prefix);
558 if (prefix_miss_cache_.count(prefix) > 0)
557 ++miss_count; 559 ++miss_count;
558 } 560 }
559 } 561 }
560 562
561 // If all the prefixes are cached as 'misses', don't issue a GetHash. 563 // If all the prefixes are cached as 'misses', don't issue a GetHash.
562 if (miss_count == prefix_hits->size()) 564 if (miss_count == prefix_hits->size())
563 return false; 565 return false;
564 566
565 // Find the matching full-hash results. |full_browse_hashes_| are from the 567 // Find the matching full-hash results. |full_browse_hashes_| are from the
566 // database, |pending_browse_hashes_| are from GetHash requests between 568 // database, |pending_browse_hashes_| are from GetHash requests between
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after
806 } 808 }
807 } 809 }
808 810
809 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, 811 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
810 const SBChunkList& chunks) { 812 const SBChunkList& chunks) {
811 DCHECK_EQ(creation_loop_, MessageLoop::current()); 813 DCHECK_EQ(creation_loop_, MessageLoop::current());
812 814
813 if (corruption_detected_ || chunks.empty()) 815 if (corruption_detected_ || chunks.empty())
814 return; 816 return;
815 817
816 const base::Time insert_start = base::Time::Now(); 818 const base::TimeTicks before = base::TimeTicks::Now();
817 819
818 const int list_id = safe_browsing_util::GetListId(list_name); 820 const int list_id = safe_browsing_util::GetListId(list_name);
819 DVLOG(2) << list_name << ": " << list_id; 821 DVLOG(2) << list_name << ": " << list_id;
820 822
821 SafeBrowsingStore* store = GetStore(list_id); 823 SafeBrowsingStore* store = GetStore(list_id);
822 if (!store) return; 824 if (!store) return;
823 825
824 change_detected_ = true; 826 change_detected_ = true;
825 827
826 store->BeginChunk(); 828 store->BeginChunk();
827 if (chunks.front().is_add) { 829 if (chunks.front().is_add) {
828 InsertAddChunks(list_id, chunks); 830 InsertAddChunks(list_id, chunks);
829 } else { 831 } else {
830 InsertSubChunks(list_id, chunks); 832 InsertSubChunks(list_id, chunks);
831 } 833 }
832 store->FinishChunk(); 834 store->FinishChunk();
833 835
834 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start); 836 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
835 } 837 }
836 838
837 void SafeBrowsingDatabaseNew::DeleteChunks( 839 void SafeBrowsingDatabaseNew::DeleteChunks(
838 const std::vector<SBChunkDelete>& chunk_deletes) { 840 const std::vector<SBChunkDelete>& chunk_deletes) {
839 DCHECK_EQ(creation_loop_, MessageLoop::current()); 841 DCHECK_EQ(creation_loop_, MessageLoop::current());
840 842
841 if (corruption_detected_ || chunk_deletes.empty()) 843 if (corruption_detected_ || chunk_deletes.empty())
842 return; 844 return;
843 845
844 const std::string& list_name = chunk_deletes.front().list_name; 846 const std::string& list_name = chunk_deletes.front().list_name;
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
1072 } 1074 }
1073 1075
1074 void SafeBrowsingDatabaseNew::UpdateDownloadStore() { 1076 void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
1075 if (!download_store_.get()) 1077 if (!download_store_.get())
1076 return; 1078 return;
1077 1079
1078 // For download, we don't cache and save full hashes. 1080 // For download, we don't cache and save full hashes.
1079 std::vector<SBAddFullHash> empty_add_hashes; 1081 std::vector<SBAddFullHash> empty_add_hashes;
1080 1082
1081 // For download, backend lookup happens only if a prefix is in add list. 1083 // For download, backend lookup happens only if a prefix is in add list.
1082 // No need to pass in miss cache when call FinishUpdate to caculate
1083 // bloomfilter false positives.
1084 std::set<SBPrefix> empty_miss_cache; 1084 std::set<SBPrefix> empty_miss_cache;
1085 1085
1086 // These results are not used after this call. Simply ignore the 1086 // These results are not used after this call. Simply ignore the
1087 // returned value after FinishUpdate(...). 1087 // returned value after FinishUpdate(...).
1088 SBAddPrefixes add_prefixes_result; 1088 SBAddPrefixes add_prefixes_result;
1089 std::vector<SBAddFullHash> add_full_hashes_result; 1089 std::vector<SBAddFullHash> add_full_hashes_result;
1090 1090
1091 if (!download_store_->FinishUpdate(empty_add_hashes, 1091 if (!download_store_->FinishUpdate(empty_add_hashes,
1092 empty_miss_cache, 1092 empty_miss_cache,
1093 &add_prefixes_result, 1093 &add_prefixes_result,
1094 &add_full_hashes_result)) 1094 &add_full_hashes_result))
1095 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1095 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
1096 1096
1097 int64 size_64; 1097 int64 file_size = GetFileSizeOrZero(download_filename_);
1098 if (file_util::GetFileSize(download_filename_, &size_64)) { 1098 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
1099 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1099 static_cast<int>(file_size / 1024));
1100 static_cast<int>(size_64 / 1024));
1101 }
1102 1100
1103 #if defined(OS_MACOSX) 1101 #if defined(OS_MACOSX)
1104 base::mac::SetFileBackupExclusion(download_filename_); 1102 base::mac::SetFileBackupExclusion(download_filename_);
1105 #endif 1103 #endif
1106 } 1104 }
1107 1105
1108 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1106 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1109 // Copy out the pending add hashes. Copy rather than swapping in 1107 // Copy out the pending add hashes. Copy rather than swapping in
1110 // case |ContainsBrowseURL()| is called before the new filter is complete. 1108 // case |ContainsBrowseURL()| is called before the new filter is complete.
1111 std::vector<SBAddFullHash> pending_add_hashes; 1109 std::vector<SBAddFullHash> pending_add_hashes;
1112 { 1110 {
1113 base::AutoLock locked(lookup_lock_); 1111 base::AutoLock locked(lookup_lock_);
1114 pending_add_hashes.insert(pending_add_hashes.end(), 1112 pending_add_hashes.insert(pending_add_hashes.end(),
1115 pending_browse_hashes_.begin(), 1113 pending_browse_hashes_.begin(),
1116 pending_browse_hashes_.end()); 1114 pending_browse_hashes_.end());
1117 } 1115 }
1118 1116
1119 // Measure the amount of IO during the bloom filter build. 1117 // Measure the amount of IO during the filter build.
1120 base::IoCounters io_before, io_after; 1118 base::IoCounters io_before, io_after;
1121 base::ProcessHandle handle = base::Process::Current().handle(); 1119 base::ProcessHandle handle = base::Process::Current().handle();
1122 scoped_ptr<base::ProcessMetrics> metric( 1120 scoped_ptr<base::ProcessMetrics> metric(
1123 #if !defined(OS_MACOSX) 1121 #if !defined(OS_MACOSX)
1124 base::ProcessMetrics::CreateProcessMetrics(handle) 1122 base::ProcessMetrics::CreateProcessMetrics(handle)
1125 #else 1123 #else
1126 // Getting stats only for the current process is enough, so NULL is fine. 1124 // Getting stats only for the current process is enough, so NULL is fine.
1127 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1125 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1128 #endif 1126 #endif
1129 ); 1127 );
1130 1128
1131 // IoCounters are currently not supported on Mac, and may not be 1129 // IoCounters are currently not supported on Mac, and may not be
1132 // available for Linux, so we check the result and only show IO 1130 // available for Linux, so we check the result and only show IO
1133 // stats if they are available. 1131 // stats if they are available.
1134 const bool got_counters = metric->GetIOCounters(&io_before); 1132 const bool got_counters = metric->GetIOCounters(&io_before);
1135 1133
1136 const base::Time before = base::Time::Now(); 1134 const base::TimeTicks before = base::TimeTicks::Now();
1137 1135
1138 SBAddPrefixes add_prefixes; 1136 SBAddPrefixes add_prefixes;
1139 std::vector<SBAddFullHash> add_full_hashes; 1137 std::vector<SBAddFullHash> add_full_hashes;
1140 if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, 1138 if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
1141 &add_prefixes, &add_full_hashes)) { 1139 &add_prefixes, &add_full_hashes)) {
1142 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1140 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1143 return; 1141 return;
1144 } 1142 }
1145 1143
1146 scoped_refptr<BloomFilter> bloom_filter; 1144 // TODO(shess): If |add_prefixes| were sorted by the prefix, it
1147 scoped_ptr<safe_browsing::PrefixSet> prefix_set; 1145 // could be passed directly to |PrefixSet()|, removing the need for
1148 FiltersFromAddPrefixes(add_prefixes, &bloom_filter, &prefix_set); 1146 // |prefixes|. For now, |prefixes| is useful while debugging
1147 // things.
1148 std::vector<SBPrefix> prefixes;
1149 prefixes.reserve(add_prefixes.size());
1150 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
1151 iter != add_prefixes.end(); ++iter) {
1152 prefixes.push_back(iter->prefix);
1153 }
1154
1155 std::sort(prefixes.begin(), prefixes.end());
1156 scoped_ptr<safe_browsing::PrefixSet>
1157 prefix_set(new safe_browsing::PrefixSet(prefixes));
1149 1158
1150 // This needs to be in sorted order by prefix for efficient access. 1159 // This needs to be in sorted order by prefix for efficient access.
1151 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1160 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1152 SBAddFullHashPrefixLess); 1161 SBAddFullHashPrefixLess);
1153 1162
1154 // Swap in the newly built filter and cache. 1163 // Swap in the newly built filter and cache.
1155 { 1164 {
1156 base::AutoLock locked(lookup_lock_); 1165 base::AutoLock locked(lookup_lock_);
1157 full_browse_hashes_.swap(add_full_hashes); 1166 full_browse_hashes_.swap(add_full_hashes);
1158 1167
1159 // TODO(shess): If |CacheHashResults()| is posted between the 1168 // TODO(shess): If |CacheHashResults()| is posted between the
1160 // earlier lock and this clear, those pending hashes will be lost. 1169 // earlier lock and this clear, those pending hashes will be lost.
1161 // It could be fixed by only removing hashes which were collected 1170 // It could be fixed by only removing hashes which were collected
1162 // at the earlier point. I believe that is fail-safe as-is (the 1171 // at the earlier point. I believe that is fail-safe as-is (the
1163 // hash will be fetched again). 1172 // hash will be fetched again).
1164 pending_browse_hashes_.clear(); 1173 pending_browse_hashes_.clear();
1165 prefix_miss_cache_.clear(); 1174 prefix_miss_cache_.clear();
1166 browse_bloom_filter_.swap(bloom_filter); 1175 browse_bloom_filter_ = NULL; // Stop using the bloom filter.
1167 prefix_set_.swap(prefix_set); 1176 prefix_set_.swap(prefix_set);
1168 } 1177 }
1169 1178
1170 const base::TimeDelta bloom_gen = base::Time::Now() - before; 1179 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1180 << (base::TimeTicks::Now() - before).InMilliseconds()
1181 << " ms total. prefix count: " << add_prefixes.size();
1182 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1171 1183
1172 // Persist the bloom filter to disk. Since only this thread changes 1184 // Persist the prefix set to disk. Since only this thread changes
1173 // |browse_bloom_filter_|, there is no need to lock. 1185 // |prefix_set_|, there is no need to lock.
1174 WriteBloomFilter(); 1186 WritePrefixSet();
1175 1187
1176 // Gather statistics. 1188 // Gather statistics.
1177 if (got_counters && metric->GetIOCounters(&io_after)) { 1189 if (got_counters && metric->GetIOCounters(&io_after)) {
1178 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1190 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
1179 static_cast<int>(io_after.ReadTransferCount - 1191 static_cast<int>(io_after.ReadTransferCount -
1180 io_before.ReadTransferCount) / 1024); 1192 io_before.ReadTransferCount) / 1024);
1181 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1193 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
1182 static_cast<int>(io_after.WriteTransferCount - 1194 static_cast<int>(io_after.WriteTransferCount -
1183 io_before.WriteTransferCount) / 1024); 1195 io_before.WriteTransferCount) / 1024);
1184 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1196 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
1185 static_cast<int>(io_after.ReadOperationCount - 1197 static_cast<int>(io_after.ReadOperationCount -
1186 io_before.ReadOperationCount)); 1198 io_before.ReadOperationCount));
1187 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1199 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
1188 static_cast<int>(io_after.WriteOperationCount - 1200 static_cast<int>(io_after.WriteOperationCount -
1189 io_before.WriteOperationCount)); 1201 io_before.WriteOperationCount));
1190 } 1202 }
1191 DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in " 1203
1192 << bloom_gen.InMilliseconds() << " ms total. prefix count: " 1204 int64 file_size = GetFileSizeOrZero(prefix_set_filename_);
1193 << add_prefixes.size(); 1205 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
1194 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); 1206 static_cast<int>(file_size / 1024));
1195 UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes", 1207 file_size = GetFileSizeOrZero(browse_filename_);
1196 browse_bloom_filter_->size() / 1024); 1208 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1197 int64 size_64; 1209 static_cast<int>(file_size / 1024));
1198 if (file_util::GetFileSize(browse_filename_, &size_64)) {
1199 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1200 static_cast<int>(size_64 / 1024));
1201 }
1202 1210
1203 #if defined(OS_MACOSX) 1211 #if defined(OS_MACOSX)
1204 base::mac::SetFileBackupExclusion(browse_filename_); 1212 base::mac::SetFileBackupExclusion(browse_filename_);
1205 #endif 1213 #endif
1206 } 1214 }
1207 1215
1208 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1216 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
1209 // Reset the database after the current task has unwound (but only 1217 // Reset the database after the current task has unwound (but only
1210 // reset once within the scope of a given task). 1218 // reset once within the scope of a given task).
1211 if (!reset_factory_.HasWeakPtrs()) { 1219 if (!reset_factory_.HasWeakPtrs()) {
1212 RecordFailure(FAILURE_DATABASE_CORRUPT); 1220 RecordFailure(FAILURE_DATABASE_CORRUPT);
1213 MessageLoop::current()->PostTask(FROM_HERE, 1221 MessageLoop::current()->PostTask(FROM_HERE,
1214 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1222 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
1215 reset_factory_.GetWeakPtr())); 1223 reset_factory_.GetWeakPtr()));
1216 } 1224 }
1217 } 1225 }
1218 1226
1219 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1227 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
1220 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1228 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
1221 corruption_detected_ = true; // Stop updating the database. 1229 corruption_detected_ = true; // Stop updating the database.
1222 ResetDatabase(); 1230 ResetDatabase();
1223 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1231 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
1224 } 1232 }
1225 1233
1226 // TODO(shess): I'm not clear why this code doesn't have any 1234 // TODO(shess): I'm not clear why this code doesn't have any
1227 // real error-handling. 1235 // real error-handling.
1228 void SafeBrowsingDatabaseNew::LoadBloomFilter() { 1236 // TODO(shess): After a transition period, this can convert to just
1237 // giving up if the prefix set is not on disk.
1238 void SafeBrowsingDatabaseNew::LoadBloomFilterOrPrefixSet() {
1229 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1239 DCHECK_EQ(creation_loop_, MessageLoop::current());
1230 DCHECK(!bloom_filter_filename_.empty()); 1240 DCHECK(!bloom_filter_filename_.empty());
1241 DCHECK(!prefix_set_filename_.empty());
1231 1242
1232 // If we're missing either of the database or filter files, we wait until the 1243 // If there is no database, the filter cannot be used.
1233 // next update to generate a new filter. 1244 base::PlatformFileInfo db_info;
1234 // TODO(paulg): Investigate how often the filter file is missing and how 1245 if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
1235 // expensive it would be to regenerate it.
1236 int64 size_64 = 0;
1237 if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
1238 return; 1246 return;
1239 1247
1240 if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || 1248 RecordFilterLoad(FILTER_LOAD);
1241 size_64 == 0) { 1249
1242 RecordFailure(FAILURE_DATABASE_FILTER_MISSING); 1250 // If there is no prefix set, or if the file is too old, check for a
1251 // bloom filter.
1252 // TODO(shess): The time check is in case this code gets reverted
1253 // and re-landed. It might be good to keep as a sanity check.
1254 // Better would be to put the db's checksum in the filter file.
1255 base::PlatformFileInfo prefix_set_info;
1256 if (!file_util::GetFileInfo(prefix_set_filename_, &prefix_set_info) ||
1257 prefix_set_info.size == 0 ||
1258 prefix_set_info.last_modified < db_info.last_modified) {
1259 // No prefix set.
1260 prefix_set_.reset();
1261
1262 int64 file_size = GetFileSizeOrZero(bloom_filter_filename_);
1263 if (!file_size) {
1264 RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
1265 return;
1266 }
1267
1268 const base::TimeTicks before = base::TimeTicks::Now();
1269 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
1270 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
1271 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1272 UMA_HISTOGRAM_TIMES("SB2.BloomFilterLoad", base::TimeTicks::Now() - before);
1273
1274 if (!browse_bloom_filter_.get())
1275 RecordFailure(FAILURE_DATABASE_FILTER_READ);
1276 else
1277 RecordFilterLoad(FILTER_LOADED_BLOOM_FILTER);
1278
1243 return; 1279 return;
1244 } 1280 }
1245 1281
1282 // Once there is a prefix set stored, never use the bloom filter.
1283 browse_bloom_filter_ = NULL;
1284
1285 // TODO(shess): The bloom filter file should have been deleted in
1286 // WritePrefixSet(), unless this code is reverted and re-landed.
1287 // Just paranoid.
1288 file_util::Delete(bloom_filter_filename_, false);
1289
1246 const base::TimeTicks before = base::TimeTicks::Now(); 1290 const base::TimeTicks before = base::TimeTicks::Now();
1247 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); 1291 prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(prefix_set_filename_));
1248 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " 1292 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
1249 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1293 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1294 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
1250 1295
1251 if (!browse_bloom_filter_.get()) 1296 if (!prefix_set_.get())
1252 RecordFailure(FAILURE_DATABASE_FILTER_READ); 1297 RecordFailure(FAILURE_DATABASE_PREFIX_SET_READ);
1253 1298 else
1254 // Use an empty prefix set until the first update. 1299 RecordFilterLoad(FILTER_LOADED_PREFIX_SET);
1255 prefix_set_.reset(CreateEmptyPrefixSet());
1256 } 1300 }
1257 1301
1258 bool SafeBrowsingDatabaseNew::Delete() { 1302 bool SafeBrowsingDatabaseNew::Delete() {
1259 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1303 DCHECK_EQ(creation_loop_, MessageLoop::current());
1260 1304
1261 const bool r1 = browse_store_->Delete(); 1305 const bool r1 = browse_store_->Delete();
1262 if (!r1) 1306 if (!r1)
1263 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1307 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1264 1308
1265 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1309 const bool r2 = download_store_.get() ? download_store_->Delete() : true;
1266 if (!r2) 1310 if (!r2)
1267 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1311 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1268 1312
1269 const bool r3 = csd_whitelist_store_.get() ? 1313 const bool r3 = csd_whitelist_store_.get() ?
1270 csd_whitelist_store_->Delete() : true; 1314 csd_whitelist_store_->Delete() : true;
1271 if (!r3) 1315 if (!r3)
1272 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1316 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1273 1317
1274 const bool r4 = download_whitelist_store_.get() ? 1318 const bool r4 = download_whitelist_store_.get() ?
1275 download_whitelist_store_->Delete() : true; 1319 download_whitelist_store_->Delete() : true;
1276 if (!r4) 1320 if (!r4)
1277 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1321 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
1278 1322
1279 const bool r5 = file_util::Delete(bloom_filter_filename_, false); 1323 const bool r5 = file_util::Delete(bloom_filter_filename_, false);
1280 if (!r5) 1324 if (!r5)
1281 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1325 RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
1282 return r1 && r2 && r3 && r4 && r5; 1326
1327 const bool r6 = file_util::Delete(prefix_set_filename_, false);
1328 if (!r6)
1329 RecordFailure(FAILURE_DATABASE_PREFIX_SET_DELETE);
1330 return r1 && r2 && r3 && r4 && r5 && r6;
1283 } 1331 }
1284 1332
1285 void SafeBrowsingDatabaseNew::WriteBloomFilter() { 1333 void SafeBrowsingDatabaseNew::WritePrefixSet() {
1286 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1334 DCHECK_EQ(creation_loop_, MessageLoop::current());
1287 1335
1288 if (!browse_bloom_filter_.get()) 1336 if (!prefix_set_.get())
1289 return; 1337 return;
1290 1338
1291 const base::TimeTicks before = base::TimeTicks::Now(); 1339 const base::TimeTicks before = base::TimeTicks::Now();
1292 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); 1340 const bool write_ok = prefix_set_->WriteFile(prefix_set_filename_);
1293 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " 1341 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
1294 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1342 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
1343 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
1295 1344
1296 if (!write_ok) 1345 if (!write_ok)
1297 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); 1346 RecordFailure(FAILURE_DATABASE_PREFIX_SET_WRITE);
1347
1348 // Delete any stale bloom filter (checking before deleting is
1349 // unlikely to be faster).
1350 file_util::Delete(bloom_filter_filename_, false);
1298 1351
1299 #if defined(OS_MACOSX) 1352 #if defined(OS_MACOSX)
1300 base::mac::SetFileBackupExclusion(bloom_filter_filename_); 1353 base::mac::SetFileBackupExclusion(prefix_set_filename_);
1301 #endif 1354 #endif
1302 } 1355 }
1303 1356
1304 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1357 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
1305 base::AutoLock locked(lookup_lock_); 1358 base::AutoLock locked(lookup_lock_);
1306 whitelist->second = true; 1359 whitelist->second = true;
1307 whitelist->first.clear(); 1360 whitelist->first.clear();
1308 } 1361 }
1309 1362
1310 void SafeBrowsingDatabaseNew::LoadWhitelist( 1363 void SafeBrowsingDatabaseNew::LoadWhitelist(
(...skipping 19 matching lines...) Expand all
1330 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1383 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
1331 kill_switch)) { 1384 kill_switch)) {
1332 // The kill switch is whitelisted hence we whitelist all URLs. 1385 // The kill switch is whitelisted hence we whitelist all URLs.
1333 WhitelistEverything(whitelist); 1386 WhitelistEverything(whitelist);
1334 } else { 1387 } else {
1335 base::AutoLock locked(lookup_lock_); 1388 base::AutoLock locked(lookup_lock_);
1336 whitelist->second = false; 1389 whitelist->second = false;
1337 whitelist->first.swap(new_whitelist); 1390 whitelist->first.swap(new_whitelist);
1338 } 1391 }
1339 } 1392 }
OLDNEW
« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_database.h ('k') | chrome/browser/safe_browsing/safe_browsing_database_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698