Index: components/precache/core/precache_fetcher.cc |
diff --git a/components/precache/core/precache_fetcher.cc b/components/precache/core/precache_fetcher.cc |
index 8f45bff3136821d26dcf92c8a563c76179d90e90..536420c67b568cd8ae183e3bfa3f3e9a5b787b67 100644 |
--- a/components/precache/core/precache_fetcher.cc |
+++ b/components/precache/core/precache_fetcher.cc |
@@ -6,6 +6,7 @@ |
#include <algorithm> |
#include <limits> |
+#include <set> |
#include <utility> |
#include <vector> |
@@ -53,11 +54,14 @@ const int kNoTracking = |
net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | |
net::LOAD_DO_NOT_SEND_AUTH_DATA; |
-namespace { |
- |
// The maximum number of URLFetcher requests that can be on flight in parallel. |
+// Note that OnManifestFetchComplete and OnResourceFetchComplete perform |
+// remove_if operations which are O(kMaxParallelFetches). Those should be |
+// optimized before increasing this value significantly. |
const int kMaxParallelFetches = 10; |
+namespace { |
+ |
// The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to |
// a number we expect to be in the 99th percentile for the histogram, give or |
// take. |
@@ -183,25 +187,26 @@ std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) { |
// hosts in |hosts_to_fetch|, is added to |hosts_info|. |
std::deque<ManifestHostInfo> RetrieveManifestInfo( |
const base::WeakPtr<PrecacheDatabase>& precache_database, |
- std::vector<std::string> hosts_to_fetch) { |
+ std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) { |
+ VLOG(9) << "RetrieveManifestInfo"; |
std::deque<ManifestHostInfo> hosts_info; |
if (!precache_database) |
return hosts_info; |
for (const auto& host : hosts_to_fetch) { |
- auto referrer_host_info = precache_database->GetReferrerHost(host); |
+ auto referrer_host_info = precache_database->GetReferrerHost(host.first); |
if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { |
std::vector<GURL> used_urls, unused_urls; |
precache_database->GetURLListForReferrerHost(referrer_host_info.id, |
&used_urls, &unused_urls); |
hosts_info.push_back( |
- ManifestHostInfo(referrer_host_info.manifest_id, host, |
- GetResourceURLBase64Hash(used_urls), |
+ ManifestHostInfo(referrer_host_info.manifest_id, host.first, |
+ host.second, GetResourceURLBase64Hash(used_urls), |
GetResourceURLBase64Hash(unused_urls))); |
} else { |
hosts_info.push_back( |
- ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host, |
- std::string(), std::string())); |
+ ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first, |
+ host.second, std::string(), std::string())); |
} |
} |
return hosts_info; |
@@ -209,6 +214,7 @@ std::deque<ManifestHostInfo> RetrieveManifestInfo( |
PrecacheQuota RetrieveQuotaInfo( |
const base::WeakPtr<PrecacheDatabase>& precache_database) { |
+ VLOG(9) << "RetrieveQuotaInfo"; |
PrecacheQuota quota; |
if (precache_database) { |
quota = precache_database->GetQuota(); |
@@ -363,30 +369,24 @@ void PrecacheFetcher::RecordCompletionStatistics( |
base::TimeDelta::FromSeconds(1), |
base::TimeDelta::FromHours(4), 50); |
- // Number of manifests for which we have downloaded all resources. |
- int manifests_completed = |
- unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch; |
+ int num_total_resources = unfinished_work.num_resource_urls(); |
+ int percent_completed = |
+ num_total_resources == 0 |
+ ? 0 |
+ : (100 * (static_cast<double>(num_total_resources - |
+ remaining_resource_urls_to_fetch) / |
+ num_total_resources)); |
- // If there are resource URLs left to fetch, the last manifest is not yet |
- // completed. |
- if (remaining_resource_urls_to_fetch > 0) |
- --manifests_completed; |
- |
- DCHECK_GE(manifests_completed, 0); |
- int percent_completed = unfinished_work.num_manifest_urls() == 0 |
- ? 0 |
- : (static_cast<double>(manifests_completed) / |
- unfinished_work.num_manifest_urls() * 100); |
+ VLOG(6) << "Percent completed: " << percent_completed; |
bengr
2016/10/14 21:52:19
Do you need these VLOG statements?
twifkak
2016/10/14 22:41:45
They were very helpful during debugging, but I can
bengr
2016/10/18 18:57:20
Chromium style is to remove logging statements, so
twifkak
2016/10/18 19:52:27
Done.
|
UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", |
percent_completed); |
- UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
- unfinished_work.total_bytes(), |
- 1, kMaxResponseBytes, 100); |
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
+ unfinished_work.total_bytes(), 1, |
+ kMaxResponseBytes, 100); |
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", |
- unfinished_work.network_bytes(), |
- 1, kMaxResponseBytes, |
- 100); |
+ unfinished_work.network_bytes(), 1, |
+ kMaxResponseBytes, 100); |
} |
// static |
@@ -399,6 +399,7 @@ PrecacheFetcher::PrecacheFetcher( |
net::URLRequestContextGetter* request_context, |
const GURL& config_url, |
const std::string& manifest_url_prefix, |
+ bool global_ranking, |
std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, |
uint32_t experiment_id, |
const base::WeakPtr<PrecacheDatabase>& precache_database, |
@@ -407,6 +408,7 @@ PrecacheFetcher::PrecacheFetcher( |
: request_context_(request_context), |
config_url_(config_url), |
manifest_url_prefix_(manifest_url_prefix), |
+ global_ranking_(global_ranking), |
precache_database_(precache_database), |
db_task_runner_(std::move(db_task_runner)), |
precache_delegate_(precache_delegate), |
@@ -426,8 +428,10 @@ PrecacheFetcher::PrecacheFetcher( |
// keeping track of the current resource index. |
for (const auto& resource : unfinished_work->resource()) { |
if (resource.has_url() && resource.has_top_host_name()) { |
+ // Weight doesn't matter, as the resources have already been sorted by |
+ // this point. |
resources_to_fetch_.emplace_back(GURL(resource.url()), |
- resource.top_host_name()); |
+ resource.top_host_name(), 0); |
} |
} |
unfinished_work_ = std::move(unfinished_work); |
@@ -446,28 +450,26 @@ std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
// If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and |
// top hosts should be left as is in |unfinished_work_|. |
unfinished_work_->clear_top_host(); |
+ for (const auto& top_host : top_hosts_fetching_) { |
bengr
2016/10/14 21:52:19
Remove curly braces.
twifkak
2016/10/14 22:41:45
Done.
|
+ unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
+ } |
for (const auto& top_host : top_hosts_to_fetch_) { |
bengr
2016/10/14 21:52:19
Remove curly braces.
twifkak
2016/10/14 22:41:45
Done.
|
unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
} |
} |
+ for (const auto& resource : resources_fetching_) { |
+ auto new_resource = unfinished_work_->add_resource(); |
+ new_resource->set_url(resource.url.spec()); |
+ new_resource->set_top_host_name(resource.referrer); |
+ } |
for (const auto& resource : resources_to_fetch_) { |
auto new_resource = unfinished_work_->add_resource(); |
- new_resource->set_url(resource.first.spec()); |
- new_resource->set_top_host_name(resource.second); |
- } |
- for (const auto& it : pool_.elements()) { |
- const Fetcher* fetcher = it.first; |
- GURL config_url = |
- config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
- if (fetcher->is_resource_request()) { |
- auto resource = unfinished_work_->add_resource(); |
- resource->set_url(fetcher->url().spec()); |
- resource->set_top_host_name(fetcher->referrer()); |
- } else if (fetcher->url() != config_url) { |
- unfinished_work_->add_top_host()->set_hostname(fetcher->referrer()); |
- } |
+ new_resource->set_url(resource.url.spec()); |
+ new_resource->set_top_host_name(resource.referrer); |
} |
+ top_hosts_fetching_.clear(); |
top_hosts_to_fetch_.clear(); |
+ resources_fetching_.clear(); |
resources_to_fetch_.clear(); |
pool_.DeleteAll(); |
return std::move(unfinished_work_); |
@@ -498,40 +500,43 @@ void PrecacheFetcher::Start() { |
void PrecacheFetcher::StartNextResourceFetch() { |
DCHECK(unfinished_work_->has_config_settings()); |
while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
- const auto& resource = resources_to_fetch_.front(); |
+ ResourceInfo& resource = resources_to_fetch_.front(); |
const size_t max_bytes = std::min( |
quota_.remaining(), |
std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
unfinished_work_->config_settings().max_bytes_total() - |
unfinished_work_->total_bytes())); |
- VLOG(3) << "Fetching " << resource.first << " " << resource.second; |
+ VLOG(3) << "Fetching " << resource.url << " " << resource.referrer; |
pool_.Add(base::MakeUnique<Fetcher>( |
- request_context_.get(), resource.first, resource.second, |
+ request_context_.get(), resource.url, resource.referrer, |
base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
true /* is_resource_request */, max_bytes)); |
+ resources_fetching_.push_back(std::move(resource)); |
resources_to_fetch_.pop_front(); |
} |
} |
-void PrecacheFetcher::StartNextManifestFetch() { |
- if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable()) |
- return; |
- |
- // We only fetch one manifest at a time to keep the size of |
- // resources_to_fetch_ as small as possible. |
- VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url; |
- pool_.Add(base::MakeUnique<Fetcher>( |
- request_context_.get(), top_hosts_to_fetch_.front().manifest_url, |
- top_hosts_to_fetch_.front().hostname, |
- base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), |
- false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
- top_hosts_to_fetch_.pop_front(); |
+void PrecacheFetcher::StartNextManifestFetches() { |
+ // We fetch as many manifests at a time as possible, as we need all resource |
+ // URLs in memory in order to rank them. |
+ while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) { |
+ ManifestHostInfo& top_host = top_hosts_to_fetch_.front(); |
+ VLOG(3) << "Fetching " << top_host.manifest_url; |
bengr
2016/10/14 21:52:19
Do we need the VLOG (here and below)?
twifkak
2016/10/14 22:41:45
This VLOG is not new. The ones I added are all --v
|
+ pool_.Add(base::MakeUnique<Fetcher>( |
+ request_context_.get(), top_host.manifest_url, top_host.hostname, |
+ base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(), |
+ top_host.visits), |
+ false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
+ top_hosts_fetching_.push_back(std::move(top_host)); |
+ top_hosts_to_fetch_.pop_front(); |
+ } |
} |
void PrecacheFetcher::NotifyDone( |
size_t remaining_manifest_urls_to_fetch, |
size_t remaining_resource_urls_to_fetch) { |
+ VLOG(9) << "NotifyDone"; |
RecordCompletionStatistics(*unfinished_work_, |
remaining_manifest_urls_to_fetch, |
remaining_resource_urls_to_fetch); |
@@ -539,29 +544,21 @@ void PrecacheFetcher::NotifyDone( |
} |
void PrecacheFetcher::StartNextFetch() { |
+ VLOG(9) << "StartNextFetch"; |
DCHECK(unfinished_work_->has_config_settings()); |
// If over the precache total size cap or daily quota, then stop prefetching. |
if ((unfinished_work_->total_bytes() > |
unfinished_work_->config_settings().max_bytes_total()) || |
quota_.remaining() == 0) { |
- size_t pending_manifests_in_pool = 0; |
- size_t pending_resources_in_pool = 0; |
- for (const auto& element_pair : pool_.elements()) { |
- const Fetcher* fetcher = element_pair.first; |
- if (fetcher->is_resource_request()) |
- pending_resources_in_pool++; |
- else if (fetcher->url() != config_url_) |
- pending_manifests_in_pool++; |
- } |
pool_.DeleteAll(); |
- NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool, |
- resources_to_fetch_.size() + pending_resources_in_pool); |
+ NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(), |
+ resources_to_fetch_.size() + resources_fetching_.size()); |
return; |
} |
StartNextResourceFetch(); |
- StartNextManifestFetch(); |
+ StartNextManifestFetches(); |
if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && |
pool_.IsEmpty()) { |
// There are no more URLs to fetch, so end the precache cycle. |
@@ -572,6 +569,7 @@ void PrecacheFetcher::StartNextFetch() { |
} |
void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
+ VLOG(9) << "OnConfigFetchComplete"; |
UpdateStats(source.response_bytes(), source.network_response_bytes()); |
if (source.network_url_fetcher() == nullptr) { |
pool_.DeleteAll(); // Cancel any other ongoing request. |
@@ -589,9 +587,7 @@ void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
void PrecacheFetcher::DetermineManifests() { |
DCHECK(unfinished_work_->has_config_settings()); |
- std::vector<std::string> top_hosts_to_fetch; |
- std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info( |
- new std::deque<ManifestHostInfo>); |
+ std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch; |
// Keep track of manifest URLs that are being fetched, in order to elide |
// duplicates. |
std::set<base::StringPiece> seen_top_hosts; |
@@ -602,7 +598,7 @@ void PrecacheFetcher::DetermineManifests() { |
if (rank > unfinished_work_->config_settings().top_sites_count()) |
break; |
if (seen_top_hosts.insert(host.hostname()).second) |
- top_hosts_to_fetch.push_back(host.hostname()); |
+ top_hosts_to_fetch.emplace_back(host.hostname(), host.visits()); |
} |
// Attempt to fetch manifests for starting hosts up to the maximum top sites |
@@ -613,12 +609,15 @@ void PrecacheFetcher::DetermineManifests() { |
if (resources_to_fetch_.empty()) { |
for (const std::string& host : |
unfinished_work_->config_settings().forced_site()) { |
+ // We add a forced site with visits == 0, which means its resources will |
+ // be downloaded last. TODO(twifkak): Consider removing support for |
+ // forced_site. |
if (seen_top_hosts.insert(host).second) |
- top_hosts_to_fetch.push_back(host); |
+ top_hosts_to_fetch.emplace_back(host, 0); |
} |
} |
- // We only fetch one manifest at a time to keep the size of |
- // resources_to_fetch_ as small as possible. |
+ // We retrieve manifest usage and quota info from the local database before |
+ // fetching the manifests. |
PostTaskAndReplyWithResult( |
db_task_runner_.get(), FROM_HERE, |
base::Bind(&RetrieveManifestInfo, precache_database_, |
@@ -628,6 +627,7 @@ void PrecacheFetcher::DetermineManifests() { |
void PrecacheFetcher::OnManifestInfoRetrieved( |
std::deque<ManifestHostInfo> manifests_info) { |
+ VLOG(9) << "OnManifestInfoRetrieved"; |
const std::string prefix = manifest_url_prefix_.empty() |
? GetDefaultManifestURLPrefix() |
: manifest_url_prefix_; |
@@ -636,7 +636,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved( |
// is invalid. |
top_hosts_to_fetch_.clear(); |
unfinished_work_->set_num_manifest_urls(manifests_info.size()); |
- NotifyDone(manifests_info.size(), resources_to_fetch_.size()); |
+ NotifyDone(manifests_info.size(), resources_to_rank_.size()); |
return; |
} |
@@ -666,6 +666,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved( |
} |
void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { |
+ VLOG(9) << "OnQuotaInfoRetrieved"; |
quota_ = quota; |
base::Time time_now = base::Time::Now(); |
if (IsQuotaTimeExpired(quota_, time_now)) { |
@@ -683,10 +684,12 @@ void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { |
ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, |
const std::string& hostname, |
+ int64_t visits, |
const std::string& used_url_hash, |
const std::string& unused_url_hash) |
: manifest_id(manifest_id), |
hostname(hostname), |
+ visits(visits), |
used_url_hash(used_url_hash), |
unused_url_hash(unused_url_hash) {} |
@@ -696,7 +699,20 @@ ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; |
ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; |
-void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
+ResourceInfo::ResourceInfo(const GURL& url, |
+ const std::string& referrer, |
+ double weight) |
+ : url(url), referrer(referrer), weight(weight) {} |
+ |
+ResourceInfo::~ResourceInfo() {} |
+ |
+ResourceInfo::ResourceInfo(ResourceInfo&&) = default; |
+ |
+ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default; |
+ |
+void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits, |
+ const Fetcher& source) { |
+ VLOG(9) << "OnManifestFetchComplete " << source.referrer(); |
DCHECK(unfinished_work_->has_config_settings()); |
UpdateStats(source.response_bytes(), source.network_response_bytes()); |
if (source.network_url_fetcher() == nullptr) { |
@@ -705,9 +721,10 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
PrecacheManifest manifest; |
if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { |
- const int32_t len = |
- std::min(manifest.resource_size(), |
- unfinished_work_->config_settings().top_resources_count()); |
+ int32_t len = manifest.resource_size(); |
+ if (!global_ranking_) |
bengr
2016/10/14 21:52:19
Add curly braces.
twifkak
2016/10/14 22:41:45
Done.
|
+ len = std::min( |
+ len, unfinished_work_->config_settings().top_resources_count()); |
const uint64_t resource_bitset = |
GetResourceBitset(manifest, experiment_id_); |
for (int i = 0; i < len; ++i) { |
@@ -715,7 +732,10 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
manifest.resource(i).has_url()) { |
GURL url(manifest.resource(i).url()); |
if (url.is_valid()) { |
- resources_to_fetch_.emplace_back(url, source.referrer()); |
+ VLOG(9) << "Adding resource " << url.spec(); |
+ double weight = manifest.resource(i).weight_ratio() * host_visits; |
+ if (weight >= unfinished_work_->config_settings().min_weight()) |
+ resources_to_rank_.emplace_back(url, source.referrer(), weight); |
} |
} |
} |
@@ -726,7 +746,36 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
} |
} |
+ top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) { |
+ return top_host.manifest_url == source.url(); |
+ }); |
+ |
pool_.Delete(source); |
+ |
+ if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) { |
+ VLOG(9) << "Ranking resources."; |
+ // Done fetching manifests. Now sort resources_to_rank_ into |
+ // resources_to_fetch_, by descending weight. When StartNextFetch runs, it |
+ // will begin fetching resources. |
+ resources_to_fetch_ = std::move(resources_to_rank_); |
+ if (global_ranking_) { |
+ std::stable_sort( |
+ resources_to_fetch_.begin(), resources_to_fetch_.end(), |
+ [](const ResourceInfo& first, const ResourceInfo& second) { |
+ return first.weight > second.weight; |
+ }); |
+ } |
+ // Truncate to size |total_resources_count|. |
+ const size_t num_resources = std::min( |
+ resources_to_fetch_.size(), |
+ static_cast<size_t>( |
+ unfinished_work_->config_settings().total_resources_count())); |
+ resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources, |
+ resources_to_fetch_.end()); |
+ // Save denominator for PercentCompleted UMA. |
+ unfinished_work_->set_num_resource_urls(resources_to_fetch_.size()); |
+ } |
+ |
StartNextFetch(); |
} |
@@ -739,6 +788,10 @@ void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
source.url(), source.referrer(), base::Time::Now(), |
source.was_cached(), source.response_bytes())); |
+ resources_fetching_.remove_if([&source](const ResourceInfo& resource) { |
+ return resource.url == source.url(); |
+ }); |
+ |
pool_.Delete(source); |
// The resource has already been put in the cache during the fetch process, so |