Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(582)

Unified Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 10896048: Transition safe browsing from bloom filter to prefix set. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix prefix set read/write for empty/sparse sets. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/safe_browsing_database.cc
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index 784904eb6a2bed16750367f9808aa24272856fbc..723743804413d7e4f602d87d8ee2e50c502ae1ff 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -31,6 +31,8 @@ namespace {
// Filename suffix for the bloom filter.
const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
+// Filename suffix for the prefix set.
+const FilePath::CharType kPrefixSetFile[] = FILE_PATH_LITERAL(" Prefix Set");
// Filename suffix for download store.
const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
// Filename suffix for client-side phishing detection whitelist store.
@@ -40,7 +42,11 @@ const FilePath::CharType kCsdWhitelistDBFile[] =
const FilePath::CharType kDownloadWhitelistDBFile[] =
FILE_PATH_LITERAL(" Download Whitelist");
// Filename suffix for browse store.
-// TODO(lzheng): change to a better name when we change the file format.
+// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
+// Unfortunately, to change the name implies lots of transition code
+// for little benefit. If/when file formats change (say to put all
+// the data in one file), that would be a convenient point to rectify
+// this.
const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
// The maximum staleness for a cached entry.
@@ -258,42 +264,29 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
return a.full_hash.prefix < b.full_hash.prefix;
}
-// Helper to reduce code duplication.
-safe_browsing::PrefixSet* CreateEmptyPrefixSet() {
- return new safe_browsing::PrefixSet(std::vector<SBPrefix>());
-}
-
-// Generate |PrefixSet| and |BloomFilter| instances from the contents
-// of |add_prefixes|.
-void FiltersFromAddPrefixes(
- const SBAddPrefixes& add_prefixes,
- scoped_refptr<BloomFilter>* bloom_filter,
- scoped_ptr<safe_browsing::PrefixSet>* prefix_set) {
- const int filter_size =
- BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
- *bloom_filter = new BloomFilter(filter_size);
- if (add_prefixes.empty()) {
- prefix_set->reset(CreateEmptyPrefixSet());
- return;
- }
+// Track what LoadBloomFilterOrPrefixSet() loaded.
+enum FilterLoad {
+ FILTER_LOAD, // All calls.
+ FILTER_LOADED_PREFIX_SET, // Cases loaded from prefix set.
+ FILTER_LOADED_BLOOM_FILTER, // Cases loaded from bloom filter.
- // TODO(shess): If |add_prefixes| were sorted by the prefix, it
- // could be passed directly to |PrefixSet()|, removing the need for
- // |prefixes|.
- std::vector<SBPrefix> prefixes;
- prefixes.reserve(add_prefixes.size());
- for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
- iter != add_prefixes.end(); ++iter) {
- prefixes.push_back(iter->prefix);
- }
- std::sort(prefixes.begin(), prefixes.end());
+ // Memory space for histograms is determined by the max. ALWAYS ADD
+ // NEW VALUES BEFORE THIS ONE.
+ FILTER_LOAD_MAX
+};
- for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin();
- iter != prefixes.end(); ++iter) {
- bloom_filter->get()->Insert(*iter);
- }
+void RecordFilterLoad(FilterLoad event_type) {
+ UMA_HISTOGRAM_ENUMERATION("SB2.FilterLoad", event_type,
+ FILTER_LOAD_MAX);
+}
- prefix_set->reset(new safe_browsing::PrefixSet(prefixes));
+// This code always checks for non-zero file size. This helper makes
+// that less verbose.
+int64 GetFileSizeOrZero(const FilePath& file_path) {
+ int64 size_64;
+ if (!file_util::GetFileSize(file_path, &size_64))
+ return 0;
+ return size_64;
}
} // namespace
@@ -359,6 +352,12 @@ FilePath SafeBrowsingDatabase::BloomFilterForFilename(
}
// static
+FilePath SafeBrowsingDatabase::PrefixSetForFilename(
+ const FilePath& db_filename) {
+ return FilePath(db_filename.value() + kPrefixSetFile);
+}
+
+// static
FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
const FilePath& db_filename) {
return FilePath(db_filename.value() + kCsdWhitelistDBFile);
@@ -433,6 +432,7 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
browse_filename_ = BrowseDBFilename(filename_base);
bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
+ prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
browse_store_->Init(
browse_filename_,
@@ -448,7 +448,7 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.clear();
pending_browse_hashes_.clear();
- LoadBloomFilter();
+ LoadBloomFilterOrPrefixSet();
}
if (download_store_.get()) {
@@ -511,12 +511,8 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
full_browse_hashes_.clear();
pending_browse_hashes_.clear();
prefix_miss_cache_.clear();
- // TODO(shess): This could probably be |bloom_filter_.reset()|.
- browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
- BloomFilter::kBloomFilterSizeRatio);
- // TODO(shess): It is simpler for the code to assume that presence
- // of a bloom filter always implies presence of a prefix set.
- prefix_set_.reset(CreateEmptyPrefixSet());
+ browse_bloom_filter_ = NULL;
+ prefix_set_.reset();
}
// Wants to acquire the lock itself.
WhitelistEverything(&csd_whitelist_);
@@ -543,17 +539,23 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
return false;
// This function is called on the I/O thread, prevent changes to
- // bloom filter and caches.
+ // filter and caches.
base::AutoLock locked(lookup_lock_);
- if (!browse_bloom_filter_.get())
+ // TODO(shess): During transition, users will have a bloom filter
+ // but no prefix set until first update, after which they'll have a
+ // prefix set but no bloom filter.
+ const bool use_prefix_set = prefix_set_.get() != NULL;
+ if (!use_prefix_set && !browse_bloom_filter_.get())
return false;
size_t miss_count = 0;
for (size_t i = 0; i < full_hashes.size(); ++i) {
- if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
- prefix_hits->push_back(full_hashes[i].prefix);
- if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
+ const SBPrefix prefix = full_hashes[i].prefix;
+ if ((use_prefix_set && prefix_set_->Exists(prefix)) ||
+ (!use_prefix_set && browse_bloom_filter_->Exists(prefix))) {
+ prefix_hits->push_back(prefix);
+ if (prefix_miss_cache_.count(prefix) > 0)
++miss_count;
}
}
@@ -813,7 +815,7 @@ void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
if (corruption_detected_ || chunks.empty())
return;
- const base::Time insert_start = base::Time::Now();
+ const base::TimeTicks before = base::TimeTicks::Now();
const int list_id = safe_browsing_util::GetListId(list_name);
DVLOG(2) << list_name << ": " << list_id;
@@ -831,7 +833,7 @@ void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
}
store->FinishChunk();
- UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
+ UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
}
void SafeBrowsingDatabaseNew::DeleteChunks(
@@ -1079,8 +1081,6 @@ void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
std::vector<SBAddFullHash> empty_add_hashes;
// For download, backend lookup happens only if a prefix is in add list.
- // No need to pass in miss cache when call FinishUpdate to caculate
- // bloomfilter false positives.
std::set<SBPrefix> empty_miss_cache;
// These results are not used after this call. Simply ignore the
@@ -1094,11 +1094,9 @@ void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
&add_full_hashes_result))
RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
- int64 size_64;
- if (file_util::GetFileSize(download_filename_, &size_64)) {
- UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
- static_cast<int>(size_64 / 1024));
- }
+ int64 file_size = GetFileSizeOrZero(download_filename_);
+ UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
+ static_cast<int>(file_size / 1024));
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(download_filename_);
@@ -1116,7 +1114,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
pending_browse_hashes_.end());
}
- // Measure the amount of IO during the bloom filter build.
+ // Measure the amount of IO during the filter build.
base::IoCounters io_before, io_after;
base::ProcessHandle handle = base::Process::Current().handle();
scoped_ptr<base::ProcessMetrics> metric(
@@ -1133,7 +1131,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
// stats if they are available.
const bool got_counters = metric->GetIOCounters(&io_before);
- const base::Time before = base::Time::Now();
+ const base::TimeTicks before = base::TimeTicks::Now();
SBAddPrefixes add_prefixes;
std::vector<SBAddFullHash> add_full_hashes;
@@ -1143,9 +1141,20 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
return;
}
- scoped_refptr<BloomFilter> bloom_filter;
- scoped_ptr<safe_browsing::PrefixSet> prefix_set;
- FiltersFromAddPrefixes(add_prefixes, &bloom_filter, &prefix_set);
+ // TODO(shess): If |add_prefixes| were sorted by the prefix, it
+ // could be passed directly to |PrefixSet()|, removing the need for
+ // |prefixes|. For now, |prefixes| is useful while debugging
+ // things.
+ std::vector<SBPrefix> prefixes;
+ prefixes.reserve(add_prefixes.size());
+ for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
+ iter != add_prefixes.end(); ++iter) {
+ prefixes.push_back(iter->prefix);
+ }
+
+ std::sort(prefixes.begin(), prefixes.end());
+ scoped_ptr<safe_browsing::PrefixSet>
+ prefix_set(new safe_browsing::PrefixSet(prefixes));
// This needs to be in sorted order by prefix for efficient access.
std::sort(add_full_hashes.begin(), add_full_hashes.end(),
@@ -1163,15 +1172,18 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
// hash will be fetched again).
pending_browse_hashes_.clear();
prefix_miss_cache_.clear();
- browse_bloom_filter_.swap(bloom_filter);
+ browse_bloom_filter_ = NULL; // Stop using the bloom filter.
prefix_set_.swap(prefix_set);
}
- const base::TimeDelta bloom_gen = base::Time::Now() - before;
+ DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
+ << (base::TimeTicks::Now() - before).InMilliseconds()
+ << " ms total. prefix count: " << add_prefixes.size();
+ UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
- // Persist the bloom filter to disk. Since only this thread changes
- // |browse_bloom_filter_|, there is no need to lock.
- WriteBloomFilter();
+ // Persist the prefix set to disk. Since only this thread changes
+ // |prefix_set_|, there is no need to lock.
+ WritePrefixSet();
// Gather statistics.
if (got_counters && metric->GetIOCounters(&io_after)) {
@@ -1188,17 +1200,13 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
static_cast<int>(io_after.WriteOperationCount -
io_before.WriteOperationCount));
}
- DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
- << bloom_gen.InMilliseconds() << " ms total. prefix count: "
- << add_prefixes.size();
- UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
- UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
- browse_bloom_filter_->size() / 1024);
- int64 size_64;
- if (file_util::GetFileSize(browse_filename_, &size_64)) {
- UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
- static_cast<int>(size_64 / 1024));
- }
+
+ int64 file_size = GetFileSizeOrZero(prefix_set_filename_);
+ UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
+ static_cast<int>(file_size / 1024));
+ file_size = GetFileSizeOrZero(browse_filename_);
+ UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
+ static_cast<int>(file_size / 1024));
#if defined(OS_MACOSX)
base::mac::SetFileBackupExclusion(browse_filename_);
@@ -1225,34 +1233,70 @@ void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
// TODO(shess): I'm not clear why this code doesn't have any
// real error-handling.
-void SafeBrowsingDatabaseNew::LoadBloomFilter() {
+// TODO(shess): After a transition period, this can convert to just
+// giving up if the prefix set is not on disk.
+void SafeBrowsingDatabaseNew::LoadBloomFilterOrPrefixSet() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
DCHECK(!bloom_filter_filename_.empty());
+ DCHECK(!prefix_set_filename_.empty());
- // If we're missing either of the database or filter files, we wait until the
- // next update to generate a new filter.
- // TODO(paulg): Investigate how often the filter file is missing and how
- // expensive it would be to regenerate it.
- int64 size_64 = 0;
- if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
+ // If there is no database, the filter cannot be used.
+ base::PlatformFileInfo db_info;
+ if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
return;
- if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
- size_64 == 0) {
- RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
+ RecordFilterLoad(FILTER_LOAD);
+
+ // If there is no prefix set, or if the file is too old, check for a
+ // bloom filter.
+ // TODO(shess): The time check is in case this code gets reverted
+ // and re-landed. It might be good to keep as a sanity check.
+ // Better would be to put the db's checksum in the filter file.
+ base::PlatformFileInfo prefix_set_info;
+ if (!file_util::GetFileInfo(prefix_set_filename_, &prefix_set_info) ||
+ prefix_set_info.size == 0 ||
+ prefix_set_info.last_modified < db_info.last_modified) {
+ // No prefix set.
+ prefix_set_.reset();
+
+ int64 file_size = GetFileSizeOrZero(bloom_filter_filename_);
+ if (!file_size) {
+ RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
+ return;
+ }
+
+ const base::TimeTicks before = base::TimeTicks::Now();
+ browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
+ DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
+ << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
+ UMA_HISTOGRAM_TIMES("SB2.BloomFilterLoad", base::TimeTicks::Now() - before);
+
+ if (!browse_bloom_filter_.get())
+ RecordFailure(FAILURE_DATABASE_FILTER_READ);
+ else
+ RecordFilterLoad(FILTER_LOADED_BLOOM_FILTER);
+
return;
}
+ // Once there is a prefix set stored, never use the bloom filter.
+ browse_bloom_filter_ = NULL;
+
+ // TODO(shess): The bloom filter file should have been deleted in
+ // WritePrefixSet(), unless this code is reverted and re-landed.
+ // Just paranoid.
+ file_util::Delete(bloom_filter_filename_, false);
+
const base::TimeTicks before = base::TimeTicks::Now();
- browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
- DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
+ prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(prefix_set_filename_));
+ DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
+ UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
- if (!browse_bloom_filter_.get())
- RecordFailure(FAILURE_DATABASE_FILTER_READ);
-
- // Use an empty prefix set until the first update.
- prefix_set_.reset(CreateEmptyPrefixSet());
+ if (!prefix_set_.get())
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_READ);
+ else
+ RecordFilterLoad(FILTER_LOADED_PREFIX_SET);
}
bool SafeBrowsingDatabaseNew::Delete() {
@@ -1279,25 +1323,34 @@ bool SafeBrowsingDatabaseNew::Delete() {
const bool r5 = file_util::Delete(bloom_filter_filename_, false);
if (!r5)
RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
- return r1 && r2 && r3 && r4 && r5;
+
+ const bool r6 = file_util::Delete(prefix_set_filename_, false);
+ if (!r6)
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_DELETE);
+ return r1 && r2 && r3 && r4 && r5 && r6;
}
-void SafeBrowsingDatabaseNew::WriteBloomFilter() {
+void SafeBrowsingDatabaseNew::WritePrefixSet() {
DCHECK_EQ(creation_loop_, MessageLoop::current());
- if (!browse_bloom_filter_.get())
+ if (!prefix_set_.get())
return;
const base::TimeTicks before = base::TimeTicks::Now();
- const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
- DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
+ const bool write_ok = prefix_set_->WriteFile(prefix_set_filename_);
+ DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
+ UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
if (!write_ok)
- RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_WRITE);
+
+ // Delete any stale bloom filter (checking before deleting is
+ // unlikely to be faster).
+ file_util::Delete(bloom_filter_filename_, false);
#if defined(OS_MACOSX)
- base::mac::SetFileBackupExclusion(bloom_filter_filename_);
+ base::mac::SetFileBackupExclusion(prefix_set_filename_);
#endif
}
« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_database.h ('k') | chrome/browser/safe_browsing/safe_browsing_database_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698