| Index: components/precache/core/precache_fetcher.cc
|
| diff --git a/components/precache/core/precache_fetcher.cc b/components/precache/core/precache_fetcher.cc
|
| index 8f45bff3136821d26dcf92c8a563c76179d90e90..77766e1588e61f4af7fcaaf351b457915a93311c 100644
|
| --- a/components/precache/core/precache_fetcher.cc
|
| +++ b/components/precache/core/precache_fetcher.cc
|
| @@ -6,6 +6,7 @@
|
|
|
| #include <algorithm>
|
| #include <limits>
|
| +#include <set>
|
| #include <utility>
|
| #include <vector>
|
|
|
| @@ -53,11 +54,14 @@ const int kNoTracking =
|
| net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
|
| net::LOAD_DO_NOT_SEND_AUTH_DATA;
|
|
|
| -namespace {
|
| -
|
| -// The maximum number of URLFetcher requests that can be on flight in parallel.
|
| +// The maximum number of URLFetcher requests that can be in flight in parallel.
|
| +// Note that OnManifestFetchComplete and OnResourceFetchComplete perform
|
| +// remove_if operations which are O(kMaxParallelFetches). Those should be
|
| +// optimized before increasing this value significantly.
|
| const int kMaxParallelFetches = 10;
|
|
|
| +namespace {
|
| +
|
| // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
|
| // a number we expect to be in the 99th percentile for the histogram, give or
|
| // take.
|
| @@ -183,25 +187,25 @@ std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) {
|
| // hosts in |hosts_to_fetch|, is added to |hosts_info|.
|
| std::deque<ManifestHostInfo> RetrieveManifestInfo(
|
| const base::WeakPtr<PrecacheDatabase>& precache_database,
|
| - std::vector<std::string> hosts_to_fetch) {
|
| + std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) {
|
| std::deque<ManifestHostInfo> hosts_info;
|
| if (!precache_database)
|
| return hosts_info;
|
|
|
| for (const auto& host : hosts_to_fetch) {
|
| - auto referrer_host_info = precache_database->GetReferrerHost(host);
|
| + auto referrer_host_info = precache_database->GetReferrerHost(host.first);
|
| if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) {
|
| std::vector<GURL> used_urls, unused_urls;
|
| precache_database->GetURLListForReferrerHost(referrer_host_info.id,
|
| &used_urls, &unused_urls);
|
| hosts_info.push_back(
|
| - ManifestHostInfo(referrer_host_info.manifest_id, host,
|
| - GetResourceURLBase64Hash(used_urls),
|
| + ManifestHostInfo(referrer_host_info.manifest_id, host.first,
|
| + host.second, GetResourceURLBase64Hash(used_urls),
|
| GetResourceURLBase64Hash(unused_urls)));
|
| } else {
|
| hosts_info.push_back(
|
| - ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host,
|
| - std::string(), std::string()));
|
| + ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first,
|
| + host.second, std::string(), std::string()));
|
| }
|
| }
|
| return hosts_info;
|
| @@ -225,6 +229,10 @@ bool IsQuotaTimeExpired(const PrecacheQuota& quota,
|
| start_time + base::TimeDelta::FromDays(1) < time_now;
|
| }
|
|
|
| +double ResourceWeight(const PrecacheResource& resource, int64_t host_visits) {
|
| + return resource.weight_ratio() * host_visits;
|
| +}
|
| +
|
| } // namespace
|
|
|
| PrecacheFetcher::Fetcher::Fetcher(
|
| @@ -301,9 +309,6 @@ void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress(
|
| // |current_network_bytes| is guaranteed to be non-negative, so this cast
|
| // is safe.
|
| static_cast<size_t>(current_network_bytes) > max_bytes_) {
|
| - VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total
|
| - << ") is over " << max_bytes_;
|
| -
|
| // Call the completion callback, to attempt the next download, or to trigger
|
| // cleanup in precache_delegate_->OnDone().
|
| response_bytes_ = current;
|
| @@ -363,30 +368,22 @@ void PrecacheFetcher::RecordCompletionStatistics(
|
| base::TimeDelta::FromSeconds(1),
|
| base::TimeDelta::FromHours(4), 50);
|
|
|
| - // Number of manifests for which we have downloaded all resources.
|
| - int manifests_completed =
|
| - unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch;
|
| -
|
| - // If there are resource URLs left to fetch, the last manifest is not yet
|
| - // completed.
|
| - if (remaining_resource_urls_to_fetch > 0)
|
| - --manifests_completed;
|
| -
|
| - DCHECK_GE(manifests_completed, 0);
|
| - int percent_completed = unfinished_work.num_manifest_urls() == 0
|
| - ? 0
|
| - : (static_cast<double>(manifests_completed) /
|
| - unfinished_work.num_manifest_urls() * 100);
|
| + int num_total_resources = unfinished_work.num_resource_urls();
|
| + int percent_completed =
|
| + num_total_resources == 0
|
| + ? 101 // Overflow bucket.
|
| + : (100 * (static_cast<double>(num_total_resources -
|
| + remaining_resource_urls_to_fetch) /
|
| + num_total_resources));
|
|
|
| UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
|
| percent_completed);
|
| - UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
|
| - unfinished_work.total_bytes(),
|
| - 1, kMaxResponseBytes, 100);
|
| + UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
|
| + unfinished_work.total_bytes(), 1,
|
| + kMaxResponseBytes, 100);
|
| UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
|
| - unfinished_work.network_bytes(),
|
| - 1, kMaxResponseBytes,
|
| - 100);
|
| + unfinished_work.network_bytes(), 1,
|
| + kMaxResponseBytes, 100);
|
| }
|
|
|
| // static
|
| @@ -426,8 +423,10 @@ PrecacheFetcher::PrecacheFetcher(
|
| // keeping track of the current resource index.
|
| for (const auto& resource : unfinished_work->resource()) {
|
| if (resource.has_url() && resource.has_top_host_name()) {
|
| + // Weight doesn't matter, as the resources have already been sorted by
|
| + // this point.
|
| resources_to_fetch_.emplace_back(GURL(resource.url()),
|
| - resource.top_host_name());
|
| + resource.top_host_name(), 0);
|
| }
|
| }
|
| unfinished_work_ = std::move(unfinished_work);
|
| @@ -446,28 +445,24 @@ std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
|
| // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and
|
| // top hosts should be left as is in |unfinished_work_|.
|
| unfinished_work_->clear_top_host();
|
| - for (const auto& top_host : top_hosts_to_fetch_) {
|
| + for (const auto& top_host : top_hosts_fetching_)
|
| unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
|
| - }
|
| + for (const auto& top_host : top_hosts_to_fetch_)
|
| + unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
|
| + }
|
| + for (const auto& resource : resources_fetching_) {
|
| + auto new_resource = unfinished_work_->add_resource();
|
| + new_resource->set_url(resource.url.spec());
|
| + new_resource->set_top_host_name(resource.referrer);
|
| }
|
| for (const auto& resource : resources_to_fetch_) {
|
| auto new_resource = unfinished_work_->add_resource();
|
| - new_resource->set_url(resource.first.spec());
|
| - new_resource->set_top_host_name(resource.second);
|
| - }
|
| - for (const auto& it : pool_.elements()) {
|
| - const Fetcher* fetcher = it.first;
|
| - GURL config_url =
|
| - config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
|
| - if (fetcher->is_resource_request()) {
|
| - auto resource = unfinished_work_->add_resource();
|
| - resource->set_url(fetcher->url().spec());
|
| - resource->set_top_host_name(fetcher->referrer());
|
| - } else if (fetcher->url() != config_url) {
|
| - unfinished_work_->add_top_host()->set_hostname(fetcher->referrer());
|
| - }
|
| + new_resource->set_url(resource.url.spec());
|
| + new_resource->set_top_host_name(resource.referrer);
|
| }
|
| + top_hosts_fetching_.clear();
|
| top_hosts_to_fetch_.clear();
|
| + resources_fetching_.clear();
|
| resources_to_fetch_.clear();
|
| pool_.DeleteAll();
|
| return std::move(unfinished_work_);
|
| @@ -488,7 +483,6 @@ void PrecacheFetcher::Start() {
|
|
|
| // Fetch the precache configuration settings from the server.
|
| DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
|
| - VLOG(3) << "Fetching " << config_url;
|
| pool_.Add(base::MakeUnique<Fetcher>(
|
| request_context_.get(), config_url, std::string(),
|
| base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()),
|
| @@ -498,35 +492,35 @@ void PrecacheFetcher::Start() {
|
| void PrecacheFetcher::StartNextResourceFetch() {
|
| DCHECK(unfinished_work_->has_config_settings());
|
| while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
|
| - const auto& resource = resources_to_fetch_.front();
|
| + ResourceInfo& resource = resources_to_fetch_.front();
|
| const size_t max_bytes = std::min(
|
| quota_.remaining(),
|
| std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
|
| unfinished_work_->config_settings().max_bytes_total() -
|
| unfinished_work_->total_bytes()));
|
| - VLOG(3) << "Fetching " << resource.first << " " << resource.second;
|
| pool_.Add(base::MakeUnique<Fetcher>(
|
| - request_context_.get(), resource.first, resource.second,
|
| + request_context_.get(), resource.url, resource.referrer,
|
| base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
|
| true /* is_resource_request */, max_bytes));
|
|
|
| + resources_fetching_.push_back(std::move(resource));
|
| resources_to_fetch_.pop_front();
|
| }
|
| }
|
|
|
| -void PrecacheFetcher::StartNextManifestFetch() {
|
| - if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable())
|
| - return;
|
| -
|
| - // We only fetch one manifest at a time to keep the size of
|
| - // resources_to_fetch_ as small as possible.
|
| - VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url;
|
| - pool_.Add(base::MakeUnique<Fetcher>(
|
| - request_context_.get(), top_hosts_to_fetch_.front().manifest_url,
|
| - top_hosts_to_fetch_.front().hostname,
|
| - base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()),
|
| - false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
|
| - top_hosts_to_fetch_.pop_front();
|
| +void PrecacheFetcher::StartNextManifestFetches() {
|
| + // We fetch as many manifests at a time as possible, as we need all resource
|
| + // URLs in memory in order to rank them.
|
| + while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) {
|
| + ManifestHostInfo& top_host = top_hosts_to_fetch_.front();
|
| + pool_.Add(base::MakeUnique<Fetcher>(
|
| + request_context_.get(), top_host.manifest_url, top_host.hostname,
|
| + base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(),
|
| + top_host.visits),
|
| + false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
|
| + top_hosts_fetching_.push_back(std::move(top_host));
|
| + top_hosts_to_fetch_.pop_front();
|
| + }
|
| }
|
|
|
| void PrecacheFetcher::NotifyDone(
|
| @@ -545,23 +539,14 @@ void PrecacheFetcher::StartNextFetch() {
|
| if ((unfinished_work_->total_bytes() >
|
| unfinished_work_->config_settings().max_bytes_total()) ||
|
| quota_.remaining() == 0) {
|
| - size_t pending_manifests_in_pool = 0;
|
| - size_t pending_resources_in_pool = 0;
|
| - for (const auto& element_pair : pool_.elements()) {
|
| - const Fetcher* fetcher = element_pair.first;
|
| - if (fetcher->is_resource_request())
|
| - pending_resources_in_pool++;
|
| - else if (fetcher->url() != config_url_)
|
| - pending_manifests_in_pool++;
|
| - }
|
| pool_.DeleteAll();
|
| - NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool,
|
| - resources_to_fetch_.size() + pending_resources_in_pool);
|
| + NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(),
|
| + resources_to_fetch_.size() + resources_fetching_.size());
|
| return;
|
| }
|
|
|
| StartNextResourceFetch();
|
| - StartNextManifestFetch();
|
| + StartNextManifestFetches();
|
| if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() &&
|
| pool_.IsEmpty()) {
|
| // There are no more URLs to fetch, so end the precache cycle.
|
| @@ -589,9 +574,7 @@ void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
|
| void PrecacheFetcher::DetermineManifests() {
|
| DCHECK(unfinished_work_->has_config_settings());
|
|
|
| - std::vector<std::string> top_hosts_to_fetch;
|
| - std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info(
|
| - new std::deque<ManifestHostInfo>);
|
| + std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch;
|
| // Keep track of manifest URLs that are being fetched, in order to elide
|
| // duplicates.
|
| std::set<base::StringPiece> seen_top_hosts;
|
| @@ -602,7 +585,7 @@ void PrecacheFetcher::DetermineManifests() {
|
| if (rank > unfinished_work_->config_settings().top_sites_count())
|
| break;
|
| if (seen_top_hosts.insert(host.hostname()).second)
|
| - top_hosts_to_fetch.push_back(host.hostname());
|
| + top_hosts_to_fetch.emplace_back(host.hostname(), host.visits());
|
| }
|
|
|
| // Attempt to fetch manifests for starting hosts up to the maximum top sites
|
| @@ -613,12 +596,15 @@ void PrecacheFetcher::DetermineManifests() {
|
| if (resources_to_fetch_.empty()) {
|
| for (const std::string& host :
|
| unfinished_work_->config_settings().forced_site()) {
|
| + // We add a forced site with visits == 0, which means its resources will
|
| + // be downloaded last. TODO(twifkak): Consider removing support for
|
| + // forced_site.
|
| if (seen_top_hosts.insert(host).second)
|
| - top_hosts_to_fetch.push_back(host);
|
| + top_hosts_to_fetch.emplace_back(host, 0);
|
| }
|
| }
|
| - // We only fetch one manifest at a time to keep the size of
|
| - // resources_to_fetch_ as small as possible.
|
| + // We retrieve manifest usage and quota info from the local database before
|
| + // fetching the manifests.
|
| PostTaskAndReplyWithResult(
|
| db_task_runner_.get(), FROM_HERE,
|
| base::Bind(&RetrieveManifestInfo, precache_database_,
|
| @@ -636,7 +622,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved(
|
| // is invalid.
|
| top_hosts_to_fetch_.clear();
|
| unfinished_work_->set_num_manifest_urls(manifests_info.size());
|
| - NotifyDone(manifests_info.size(), resources_to_fetch_.size());
|
| + NotifyDone(manifests_info.size(), resources_to_rank_.size());
|
| return;
|
| }
|
|
|
| @@ -683,10 +669,12 @@ void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) {
|
|
|
| ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
|
| const std::string& hostname,
|
| + int64_t visits,
|
| const std::string& used_url_hash,
|
| const std::string& unused_url_hash)
|
| : manifest_id(manifest_id),
|
| hostname(hostname),
|
| + visits(visits),
|
| used_url_hash(used_url_hash),
|
| unused_url_hash(unused_url_hash) {}
|
|
|
| @@ -696,7 +684,19 @@ ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default;
|
|
|
| ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default;
|
|
|
| -void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
|
| +ResourceInfo::ResourceInfo(const GURL& url,
|
| + const std::string& referrer,
|
| + double weight)
|
| + : url(url), referrer(referrer), weight(weight) {}
|
| +
|
| +ResourceInfo::~ResourceInfo() {}
|
| +
|
| +ResourceInfo::ResourceInfo(ResourceInfo&&) = default;
|
| +
|
| +ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default;
|
| +
|
| +void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits,
|
| + const Fetcher& source) {
|
| DCHECK(unfinished_work_->has_config_settings());
|
| UpdateStats(source.response_bytes(), source.network_response_bytes());
|
| if (source.network_url_fetcher() == nullptr) {
|
| @@ -715,7 +715,9 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
|
| manifest.resource(i).has_url()) {
|
| GURL url(manifest.resource(i).url());
|
| if (url.is_valid()) {
|
| - resources_to_fetch_.emplace_back(url, source.referrer());
|
| + double weight = ResourceWeight(manifest.resource(i), host_visits);
|
| + if (weight >= unfinished_work_->config_settings().min_weight())
|
| + resources_to_rank_.emplace_back(url, source.referrer(), weight);
|
| }
|
| }
|
| }
|
| @@ -726,10 +728,43 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
|
| }
|
| }
|
|
|
| + top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) {
|
| + return top_host.manifest_url == source.url();
|
| + });
|
| +
|
| pool_.Delete(source);
|
| +
|
| + if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty())
|
| + QueueResourcesForFetch();
|
| +
|
| StartNextFetch();
|
| }
|
|
|
| +void PrecacheFetcher::QueueResourcesForFetch() {
|
| + // Done fetching manifests. Now move resources_to_rank_ into
|
| + // resources_to_fetch_, so that StartNextFetch will begin fetching resources.
|
| + resources_to_fetch_ = std::move(resources_to_rank_);
|
| +
|
| + if (unfinished_work_->config_settings().global_ranking()) {
|
| + // Sort resources_to_fetch_ by descending weight.
|
| + std::stable_sort(resources_to_fetch_.begin(), resources_to_fetch_.end(),
|
| + [](const ResourceInfo& first, const ResourceInfo& second) {
|
| + return first.weight > second.weight;
|
| + });
|
| + }
|
| +
|
| + // Truncate to size |total_resources_count|.
|
| + const size_t num_resources = std::min(
|
| + resources_to_fetch_.size(),
|
| + static_cast<size_t>(
|
| + unfinished_work_->config_settings().total_resources_count()));
|
| + resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources,
|
| + resources_to_fetch_.end());
|
| +
|
| + // Save denominator for PercentCompleted UMA.
|
| + unfinished_work_->set_num_resource_urls(resources_to_fetch_.size());
|
| +}
|
| +
|
| void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
|
| UpdateStats(source.response_bytes(), source.network_response_bytes());
|
|
|
| @@ -739,6 +774,10 @@ void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
|
| source.url(), source.referrer(), base::Time::Now(),
|
| source.was_cached(), source.response_bytes()));
|
|
|
| + resources_fetching_.remove_if([&source](const ResourceInfo& resource) {
|
| + return resource.url == source.url();
|
| + });
|
| +
|
| pool_.Delete(source);
|
|
|
| // The resource has already been put in the cache during the fetch process, so
|
|
|