Index: chrome/browser/safe_browsing/safe_browsing_database.cc |
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc |
index 784904eb6a2bed16750367f9808aa24272856fbc..723743804413d7e4f602d87d8ee2e50c502ae1ff 100644 |
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc |
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc |
@@ -31,6 +31,8 @@ namespace { |
// Filename suffix for the bloom filter. |
const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); |
+// Filename suffix for the prefix set. |
+const FilePath::CharType kPrefixSetFile[] = FILE_PATH_LITERAL(" Prefix Set"); |
// Filename suffix for download store. |
const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); |
// Filename suffix for client-side phishing detection whitelist store. |
@@ -40,7 +42,11 @@ const FilePath::CharType kCsdWhitelistDBFile[] = |
const FilePath::CharType kDownloadWhitelistDBFile[] = |
FILE_PATH_LITERAL(" Download Whitelist"); |
// Filename suffix for browse store. |
-// TODO(lzheng): change to a better name when we change the file format. |
+// TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. |
+// Unfortunately, to change the name implies lots of transition code |
+// for little benefit. If/when file formats change (say to put all |
+// the data in one file), that would be a convenient point to rectify |
+// this. |
const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); |
// The maximum staleness for a cached entry. |
@@ -258,42 +264,29 @@ bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { |
return a.full_hash.prefix < b.full_hash.prefix; |
} |
-// Helper to reduce code duplication. |
-safe_browsing::PrefixSet* CreateEmptyPrefixSet() { |
- return new safe_browsing::PrefixSet(std::vector<SBPrefix>()); |
-} |
- |
-// Generate |PrefixSet| and |BloomFilter| instances from the contents |
-// of |add_prefixes|. |
-void FiltersFromAddPrefixes( |
- const SBAddPrefixes& add_prefixes, |
- scoped_refptr<BloomFilter>* bloom_filter, |
- scoped_ptr<safe_browsing::PrefixSet>* prefix_set) { |
- const int filter_size = |
- BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); |
- *bloom_filter = new BloomFilter(filter_size); |
- if (add_prefixes.empty()) { |
- prefix_set->reset(CreateEmptyPrefixSet()); |
- return; |
- } |
+// Track what LoadBloomFilterOrPrefixSet() loaded. |
+enum FilterLoad { |
+ FILTER_LOAD, // All calls. |
+ FILTER_LOADED_PREFIX_SET, // Cases loaded from prefix set. |
+ FILTER_LOADED_BLOOM_FILTER, // Cases loaded from bloom filter. |
- // TODO(shess): If |add_prefixes| were sorted by the prefix, it |
- // could be passed directly to |PrefixSet()|, removing the need for |
- // |prefixes|. |
- std::vector<SBPrefix> prefixes; |
- prefixes.reserve(add_prefixes.size()); |
- for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); |
- iter != add_prefixes.end(); ++iter) { |
- prefixes.push_back(iter->prefix); |
- } |
- std::sort(prefixes.begin(), prefixes.end()); |
+ // Memory space for histograms is determined by the max. ALWAYS ADD |
+ // NEW VALUES BEFORE THIS ONE. |
+ FILTER_LOAD_MAX |
+}; |
- for (std::vector<SBPrefix>::const_iterator iter = prefixes.begin(); |
- iter != prefixes.end(); ++iter) { |
- bloom_filter->get()->Insert(*iter); |
- } |
+void RecordFilterLoad(FilterLoad event_type) { |
+ UMA_HISTOGRAM_ENUMERATION("SB2.FilterLoad", event_type, |
+ FILTER_LOAD_MAX); |
+} |
- prefix_set->reset(new safe_browsing::PrefixSet(prefixes)); |
+// This code always checks for non-zero file size. This helper makes |
+// that less verbose. |
+int64 GetFileSizeOrZero(const FilePath& file_path) { |
+ int64 size_64; |
+ if (!file_util::GetFileSize(file_path, &size_64)) |
+ return 0; |
+ return size_64; |
} |
} // namespace |
@@ -359,6 +352,12 @@ FilePath SafeBrowsingDatabase::BloomFilterForFilename( |
} |
// static |
+FilePath SafeBrowsingDatabase::PrefixSetForFilename( |
+ const FilePath& db_filename) { |
+ return FilePath(db_filename.value() + kPrefixSetFile); |
+} |
+ |
+// static |
FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( |
const FilePath& db_filename) { |
return FilePath(db_filename.value() + kCsdWhitelistDBFile); |
@@ -433,6 +432,7 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { |
browse_filename_ = BrowseDBFilename(filename_base); |
bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); |
+ prefix_set_filename_ = PrefixSetForFilename(browse_filename_); |
browse_store_->Init( |
browse_filename_, |
@@ -448,7 +448,7 @@ void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { |
base::AutoLock locked(lookup_lock_); |
full_browse_hashes_.clear(); |
pending_browse_hashes_.clear(); |
- LoadBloomFilter(); |
+ LoadBloomFilterOrPrefixSet(); |
} |
if (download_store_.get()) { |
@@ -511,12 +511,8 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { |
full_browse_hashes_.clear(); |
pending_browse_hashes_.clear(); |
prefix_miss_cache_.clear(); |
- // TODO(shess): This could probably be |bloom_filter_.reset()|. |
- browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * |
- BloomFilter::kBloomFilterSizeRatio); |
- // TODO(shess): It is simpler for the code to assume that presence |
- // of a bloom filter always implies presence of a prefix set. |
- prefix_set_.reset(CreateEmptyPrefixSet()); |
+ browse_bloom_filter_ = NULL; |
+ prefix_set_.reset(); |
} |
// Wants to acquire the lock itself. |
WhitelistEverything(&csd_whitelist_); |
@@ -543,17 +539,23 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
return false; |
// This function is called on the I/O thread, prevent changes to |
- // bloom filter and caches. |
+ // filter and caches. |
base::AutoLock locked(lookup_lock_); |
- if (!browse_bloom_filter_.get()) |
+ // TODO(shess): During transition, users will have a bloom filter |
+ // but no prefix set until first update, after which they'll have a |
+ // prefix set but no bloom filter. |
+ const bool use_prefix_set = prefix_set_.get() != NULL; |
+ if (!use_prefix_set && !browse_bloom_filter_.get()) |
return false; |
size_t miss_count = 0; |
for (size_t i = 0; i < full_hashes.size(); ++i) { |
- if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { |
- prefix_hits->push_back(full_hashes[i].prefix); |
- if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) |
+ const SBPrefix prefix = full_hashes[i].prefix; |
+ if ((use_prefix_set && prefix_set_->Exists(prefix)) || |
+ (!use_prefix_set && browse_bloom_filter_->Exists(prefix))) { |
+ prefix_hits->push_back(prefix); |
+ if (prefix_miss_cache_.count(prefix) > 0) |
++miss_count; |
} |
} |
@@ -813,7 +815,7 @@ void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, |
if (corruption_detected_ || chunks.empty()) |
return; |
- const base::Time insert_start = base::Time::Now(); |
+ const base::TimeTicks before = base::TimeTicks::Now(); |
const int list_id = safe_browsing_util::GetListId(list_name); |
DVLOG(2) << list_name << ": " << list_id; |
@@ -831,7 +833,7 @@ void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, |
} |
store->FinishChunk(); |
- UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start); |
+ UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); |
} |
void SafeBrowsingDatabaseNew::DeleteChunks( |
@@ -1079,8 +1081,6 @@ void SafeBrowsingDatabaseNew::UpdateDownloadStore() { |
std::vector<SBAddFullHash> empty_add_hashes; |
// For download, backend lookup happens only if a prefix is in add list. |
- // No need to pass in miss cache when call FinishUpdate to caculate |
- // bloomfilter false positives. |
std::set<SBPrefix> empty_miss_cache; |
// These results are not used after this call. Simply ignore the |
@@ -1094,11 +1094,9 @@ void SafeBrowsingDatabaseNew::UpdateDownloadStore() { |
&add_full_hashes_result)) |
RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); |
- int64 size_64; |
- if (file_util::GetFileSize(download_filename_, &size_64)) { |
- UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", |
- static_cast<int>(size_64 / 1024)); |
- } |
+ int64 file_size = GetFileSizeOrZero(download_filename_); |
+ UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", |
+ static_cast<int>(file_size / 1024)); |
#if defined(OS_MACOSX) |
base::mac::SetFileBackupExclusion(download_filename_); |
@@ -1116,7 +1114,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
pending_browse_hashes_.end()); |
} |
- // Measure the amount of IO during the bloom filter build. |
+ // Measure the amount of IO during the filter build. |
base::IoCounters io_before, io_after; |
base::ProcessHandle handle = base::Process::Current().handle(); |
scoped_ptr<base::ProcessMetrics> metric( |
@@ -1133,7 +1131,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
// stats if they are available. |
const bool got_counters = metric->GetIOCounters(&io_before); |
- const base::Time before = base::Time::Now(); |
+ const base::TimeTicks before = base::TimeTicks::Now(); |
SBAddPrefixes add_prefixes; |
std::vector<SBAddFullHash> add_full_hashes; |
@@ -1143,9 +1141,20 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
return; |
} |
- scoped_refptr<BloomFilter> bloom_filter; |
- scoped_ptr<safe_browsing::PrefixSet> prefix_set; |
- FiltersFromAddPrefixes(add_prefixes, &bloom_filter, &prefix_set); |
+ // TODO(shess): If |add_prefixes| were sorted by the prefix, it |
+ // could be passed directly to |PrefixSet()|, removing the need for |
+ // |prefixes|. For now, |prefixes| is useful while debugging |
+ // things. |
+ std::vector<SBPrefix> prefixes; |
+ prefixes.reserve(add_prefixes.size()); |
+ for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); |
+ iter != add_prefixes.end(); ++iter) { |
+ prefixes.push_back(iter->prefix); |
+ } |
+ |
+ std::sort(prefixes.begin(), prefixes.end()); |
+ scoped_ptr<safe_browsing::PrefixSet> |
+ prefix_set(new safe_browsing::PrefixSet(prefixes)); |
// This needs to be in sorted order by prefix for efficient access. |
std::sort(add_full_hashes.begin(), add_full_hashes.end(), |
@@ -1163,15 +1172,18 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
// hash will be fetched again). |
pending_browse_hashes_.clear(); |
prefix_miss_cache_.clear(); |
- browse_bloom_filter_.swap(bloom_filter); |
+ browse_bloom_filter_ = NULL; // Stop using the bloom filter. |
prefix_set_.swap(prefix_set); |
} |
- const base::TimeDelta bloom_gen = base::Time::Now() - before; |
+ DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " |
+ << (base::TimeTicks::Now() - before).InMilliseconds() |
+ << " ms total. prefix count: " << add_prefixes.size(); |
+ UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); |
- // Persist the bloom filter to disk. Since only this thread changes |
- // |browse_bloom_filter_|, there is no need to lock. |
- WriteBloomFilter(); |
+ // Persist the prefix set to disk. Since only this thread changes |
+ // |prefix_set_|, there is no need to lock. |
+ WritePrefixSet(); |
// Gather statistics. |
if (got_counters && metric->GetIOCounters(&io_after)) { |
@@ -1188,17 +1200,13 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
static_cast<int>(io_after.WriteOperationCount - |
io_before.WriteOperationCount)); |
} |
- DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in " |
- << bloom_gen.InMilliseconds() << " ms total. prefix count: " |
- << add_prefixes.size(); |
- UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); |
- UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes", |
- browse_bloom_filter_->size() / 1024); |
- int64 size_64; |
- if (file_util::GetFileSize(browse_filename_, &size_64)) { |
- UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", |
- static_cast<int>(size_64 / 1024)); |
- } |
+ |
+ int64 file_size = GetFileSizeOrZero(prefix_set_filename_); |
+ UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", |
+ static_cast<int>(file_size / 1024)); |
+ file_size = GetFileSizeOrZero(browse_filename_); |
+ UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", |
+ static_cast<int>(file_size / 1024)); |
#if defined(OS_MACOSX) |
base::mac::SetFileBackupExclusion(browse_filename_); |
@@ -1225,34 +1233,70 @@ void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { |
// TODO(shess): I'm not clear why this code doesn't have any |
// real error-handling. |
-void SafeBrowsingDatabaseNew::LoadBloomFilter() { |
+// TODO(shess): After a transition period, this can convert to just |
+// giving up if the prefix set is not on disk. |
+void SafeBrowsingDatabaseNew::LoadBloomFilterOrPrefixSet() { |
DCHECK_EQ(creation_loop_, MessageLoop::current()); |
DCHECK(!bloom_filter_filename_.empty()); |
+ DCHECK(!prefix_set_filename_.empty()); |
- // If we're missing either of the database or filter files, we wait until the |
- // next update to generate a new filter. |
- // TODO(paulg): Investigate how often the filter file is missing and how |
- // expensive it would be to regenerate it. |
- int64 size_64 = 0; |
- if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0) |
+ // If there is no database, the filter cannot be used. |
+ base::PlatformFileInfo db_info; |
+ if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0) |
return; |
- if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || |
- size_64 == 0) { |
- RecordFailure(FAILURE_DATABASE_FILTER_MISSING); |
+ RecordFilterLoad(FILTER_LOAD); |
+ |
+ // If there is no prefix set, or if the file is too old, check for a |
+ // bloom filter. |
+ // TODO(shess): The time check is in case this code gets reverted |
+ // and re-landed. It might be good to keep as a sanity check. |
+ // Better would be to put the db's checksum in the filter file. |
+ base::PlatformFileInfo prefix_set_info; |
+ if (!file_util::GetFileInfo(prefix_set_filename_, &prefix_set_info) || |
+ prefix_set_info.size == 0 || |
+ prefix_set_info.last_modified < db_info.last_modified) { |
+ // No prefix set. |
+ prefix_set_.reset(); |
+ |
+ int64 file_size = GetFileSizeOrZero(bloom_filter_filename_); |
+ if (!file_size) { |
+ RecordFailure(FAILURE_DATABASE_FILTER_MISSING); |
+ return; |
+ } |
+ |
+ const base::TimeTicks before = base::TimeTicks::Now(); |
+ browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); |
+ DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " |
+ << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; |
+ UMA_HISTOGRAM_TIMES("SB2.BloomFilterLoad", base::TimeTicks::Now() - before); |
+ |
+ if (!browse_bloom_filter_.get()) |
+ RecordFailure(FAILURE_DATABASE_FILTER_READ); |
+ else |
+ RecordFilterLoad(FILTER_LOADED_BLOOM_FILTER); |
+ |
return; |
} |
+ // Once there is a prefix set stored, never use the bloom filter. |
+ browse_bloom_filter_ = NULL; |
+ |
+ // TODO(shess): The bloom filter file should have been deleted in |
+ // WritePrefixSet(), unless this code is reverted and re-landed. |
+ // Just paranoid. |
+ file_util::Delete(bloom_filter_filename_, false); |
+ |
const base::TimeTicks before = base::TimeTicks::Now(); |
- browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); |
- DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " |
+ prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(prefix_set_filename_)); |
+ DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in " |
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; |
+ UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); |
- if (!browse_bloom_filter_.get()) |
- RecordFailure(FAILURE_DATABASE_FILTER_READ); |
- |
- // Use an empty prefix set until the first update. |
- prefix_set_.reset(CreateEmptyPrefixSet()); |
+ if (!prefix_set_.get()) |
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_READ); |
+ else |
+ RecordFilterLoad(FILTER_LOADED_PREFIX_SET); |
} |
bool SafeBrowsingDatabaseNew::Delete() { |
@@ -1279,25 +1323,34 @@ bool SafeBrowsingDatabaseNew::Delete() { |
const bool r5 = file_util::Delete(bloom_filter_filename_, false); |
if (!r5) |
RecordFailure(FAILURE_DATABASE_FILTER_DELETE); |
- return r1 && r2 && r3 && r4 && r5; |
+ |
+ const bool r6 = file_util::Delete(prefix_set_filename_, false); |
+ if (!r6) |
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_DELETE); |
+ return r1 && r2 && r3 && r4 && r5 && r6; |
} |
-void SafeBrowsingDatabaseNew::WriteBloomFilter() { |
+void SafeBrowsingDatabaseNew::WritePrefixSet() { |
DCHECK_EQ(creation_loop_, MessageLoop::current()); |
- if (!browse_bloom_filter_.get()) |
+ if (!prefix_set_.get()) |
return; |
const base::TimeTicks before = base::TimeTicks::Now(); |
- const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); |
- DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " |
+ const bool write_ok = prefix_set_->WriteFile(prefix_set_filename_); |
+ DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in " |
<< (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; |
+ UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); |
if (!write_ok) |
- RecordFailure(FAILURE_DATABASE_FILTER_WRITE); |
+ RecordFailure(FAILURE_DATABASE_PREFIX_SET_WRITE); |
+ |
+ // Delete any stale bloom filter (checking before deleting is |
+ // unlikely to be faster). |
+ file_util::Delete(bloom_filter_filename_, false); |
#if defined(OS_MACOSX) |
- base::mac::SetFileBackupExclusion(bloom_filter_filename_); |
+ base::mac::SetFileBackupExclusion(prefix_set_filename_); |
#endif |
} |