Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Side by Side Diff: chrome/browser/predictors/resource_prefetch_predictor.cc

Issue 10416002: Seculative resource prefetching for URLs CL. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/predictors/resource_prefetch_predictor.h"
6
7 #include "base/command_line.h"
8 #include "base/metrics/histogram.h"
9 #include "base/stl_util.h"
10 #include "base/time.h"
11 #include "chrome/browser/history/history.h"
12 #include "chrome/browser/history/history_notifications.h"
13 #include "chrome/browser/history/in_memory_database.h"
14 #include "chrome/browser/history/url_database.h"
15 #include "chrome/browser/predictors/predictor_database.h"
16 #include "chrome/browser/predictors/predictor_database_factory.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/common/chrome_notification_types.h"
19 #include "chrome/common/chrome_switches.h"
20 #include "chrome/common/url_constants.h"
21 #include "content/browser/load_from_memory_cache_details.h"
22 #include "content/public/browser/browser_thread.h"
23 #include "content/public/browser/navigation_controller.h"
24 #include "content/public/browser/notification_service.h"
25 #include "content/public/browser/notification_source.h"
26 #include "content/public/browser/notification_types.h"
27 #include "content/public/browser/resource_request_info.h"
28 #include "content/public/browser/web_contents.h"
29 #include "net/base/mime_util.h"
30 #include "net/http/http_response_headers.h"
31 #include "net/url_request/url_request.h"
32 #include "net/url_request/url_request_context_getter.h"
33
34 using content::BrowserThread;
35
36 namespace {
37
38 // If a navigation hasn't seen a load complete event in this much time, it is
dominich 2012/05/21 16:16:53 How much work has there been to tune these numbers
Shishir 2012/05/23 01:46:46 Some of these numbers do not need experimentation
39 // considered abandoned.
40 static const int kMaxNavigationLifetimeSeconds = 60;
41
42 // Size of LRU caches for the Url data.
43 static const size_t kMaxNumUrlsToTrack = 500;
44
45 // The number of times, we should have seen visit to this Url in history
46 // to start tracking it. This is to ensure we dont bother with oneoff entries.
47 static const int kMinUrlVisitCount = 3;
48
49 // The maximum number of resources to store per entry. This is about double of
50 // the expected 25 we expect to prefetch.
51 static const int kMaxResourcesPerEntry = 50;
52
53 // Dont store subresources whose Urls are longer than this.
dominich 2012/05/21 16:16:53 nit: Don't
Shishir 2012/05/23 01:46:46 Done.
54 static const size_t kMaxSubresourceUrlLengthBytes = 1000;
55
56 // The number of consecutive misses after we stop tracking a resource Url.
57 static const int kMaxConsecutiveMisses = 3;
58
59 // The number of resources we should report accuracy stats on.
60 static const int kNumResourcesAssumedPrefetched = 25;
61
62 ResourceType::Type GetResourceTypeFromMimeType(std::string mime_type,
dominich 2012/05/21 16:16:53 const std::string& mime_type to save the copy.
Shishir 2012/05/23 01:46:46 Done.
63 ResourceType::Type fallback) {
64 if (net::IsSupportedImageMimeType(mime_type.c_str()))
65 return ResourceType::IMAGE;
66 else if (net::IsSupportedJavascriptMimeType(mime_type.c_str()))
67 return ResourceType::SCRIPT;
68 else if (net::MatchesMimeType("text/css", mime_type))
69 return ResourceType::STYLESHEET;
70 else
71 return fallback;
72 }
73
74 } // namespace
75
76 namespace predictors {
77
78 ResourcePrefetchPredictor::URLRequestSummary::URLRequestSummary()
79 : was_cached_(false) {
80 }
81
82 ResourcePrefetchPredictor::URLRequestSummary::URLRequestSummary(
83 const URLRequestSummary& other)
84 : navigation_id_(other.navigation_id_),
85 resource_url_(other.resource_url_),
86 resource_type_(other.resource_type_),
87 mime_type_(other.mime_type_),
88 was_cached_(other.was_cached_) {
89 }
90
91 ResourcePrefetchPredictor::URLRequestSummary::~URLRequestSummary() {
92 }
93
94 bool ResourcePrefetchPredictor::URLRequestSummary::InitFromURLRequest(
dominich 2012/05/21 16:16:53 TODO: check if this return value is used.
Shishir 2012/05/23 01:46:46 It is used in the interceptor(now the network dele
95 net::URLRequest* request,
96 bool is_response) {
97 const content::ResourceRequestInfo* info =
98 content::ResourceRequestInfo::ForRequest(request);
99 if (!info) {
100 LOG(ERROR) << "No ResourceRequestInfo in request";
dominich 2012/05/21 16:16:53 should this be a CHECK/DCHECK?
Shishir 2012/05/23 01:46:46 No, as explained before.
101 return false;
102 }
103
104 int render_process_id, render_view_id;
105 if (!info->GetAssociatedRenderView(&render_process_id, &render_view_id)) {
106 LOG(ERROR) << "Could not get RenderViewId from request info.";
107 return false;
108 }
109
110 navigation_id_.render_process_id_ = render_process_id;
111 navigation_id_.render_view_id_ = render_view_id;
112 navigation_id_.main_frame_url_ = request->first_party_for_cookies();
113 navigation_id_.creation_time_ = request->creation_time();
114 resource_url_ = request->original_url();
115 resource_type_ = info->GetResourceType();
116 if (is_response) {
117 request->GetMimeType(&mime_type_);
118 was_cached_ = request->was_cached();
119 // We want to rely on the mime_type for the resource type.
120 resource_type_ = GetResourceTypeFromMimeType(mime_type_, resource_type_);
121 }
122
123 return true;
124 }
125
126 ResourcePrefetchPredictor::ResourcePrefetchPredictor(Profile* profile)
127 : profile_(profile),
128 initialized_(false),
129 tables_(PredictorDatabaseFactory::GetForProfile(
130 profile)->resource_prefetch_tables()),
131 notification_registrar_(new content::NotificationRegistrar()) {
132 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
133
134 // Request the in-memory database from the history to force it to load so it's
135 // available as soon as possible.
136 HistoryService* history_service =
137 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
138 if (history_service)
139 history_service->InMemoryDatabase();
dominich 2012/05/21 16:16:53 You're not using the result of this call - this mi
Shishir 2012/05/23 01:46:46 Should not. There are examples of this in the code
140
141 // Create local caches using the database as loaded.
142 std::vector<UrlTableRow>* url_rows = new std::vector<UrlTableRow>();
143 BrowserThread::PostTaskAndReply(
144 BrowserThread::DB, FROM_HERE,
145 base::Bind(&ResourcePrefetchPredictorTables::GetAllRows,
146 tables_, url_rows),
147 base::Bind(&ResourcePrefetchPredictor::CreateCaches, this,
148 base::Owned(url_rows)));
149 }
150
151 ResourcePrefetchPredictor::~ResourcePrefetchPredictor() {
152 }
153
154 bool ResourcePrefetchPredictor::IsEnabled() {
dominich 2012/05/21 16:16:53 These methods are not ordered as in the header - p
Shishir 2012/05/23 01:46:46 Added //static. I will reorder the function before
155 CommandLine* command_line = CommandLine::ForCurrentProcess();
156 return command_line->HasSwitch(
157 switches::kEnableSpeculativeResourcePrefetching);
158 }
159
160 void ResourcePrefetchPredictor::CreateCaches(
161 std::vector<UrlTableRow>* url_rows) {
162 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
163
164 DCHECK(!initialized_);
165 DCHECK(url_table_cache_.empty());
166 DCHECK(inflight_navigations_.empty());
167
168 // Copy the data to local caches.
169 for (UrlTableRowVector::iterator it = url_rows->begin();
170 it != url_rows->end(); ++it) {
171 url_table_cache_[it->main_frame_url_].rows_.push_back(*it);
172 }
173
174 // Score and sort the database caches.
175 // TODO(shishir): The following would be much more efficient if we used
dominich 2012/05/21 16:16:53 Can you use insertion sort to sort them as they're
Shishir 2012/05/23 01:46:46 Will that be more efficient? They would still have
176 // pointers.
177 for (UrlTableCacheMap::iterator it = url_table_cache_.begin();
178 it != url_table_cache_.end(); ++it) {
179 std::sort(it->second.rows_.begin(),
180 it->second.rows_.end(),
181 ResourcePrefetchPredictorTables::UrlTableRowSorter());
182 }
183
184 // Add notifications for history loading if it is not ready.
185 if (!profile_->GetHistoryService(Profile::EXPLICIT_ACCESS)) {
186 notification_registrar_->Add(this, chrome::NOTIFICATION_HISTORY_LOADED,
187 content::Source<Profile>(profile_));
188 } else {
189 OnHistoryAndCacheLoaded();
190 }
191 }
192
193 bool ResourcePrefetchPredictor::ShouldInterceptRequest(
194 net::URLRequest* request) {
195 const content::ResourceRequestInfo* request_info =
196 content::ResourceRequestInfo::ForRequest(request);
197 if (!request_info)
198 return false;
199
200 switch (request_info->GetResourceType()) {
dominich 2012/05/21 16:16:53 could be: return request_info->GetResourceType()
Shishir 2012/05/23 01:46:46 Done.
201 case ResourceType::MAIN_FRAME:
202 return IsHandledMainPage(request);
203 default:
204 return false;
205 }
206 }
207
208 bool ResourcePrefetchPredictor::ShouldInterceptResponse(
209 net::URLRequest* response) {
210 const content::ResourceRequestInfo* request_info =
211 content::ResourceRequestInfo::ForRequest(response);
212 if (!request_info)
213 return false;
214
215 switch (request_info->GetResourceType()) {
dominich 2012/05/21 16:16:53 could be: return request_info->GetResourceType()
Shishir 2012/05/23 01:46:46 Done.
216 case ResourceType::MAIN_FRAME:
217 return IsHandledMainPage(response);
218
219 // We discard request type here and look for mime type.
220 default:
221 return IsHandledSubresource(response);
222 }
223 }
224
225 bool ResourcePrefetchPredictor::ShouldInterceptRedirect(
226 net::URLRequest* response) {
227 const content::ResourceRequestInfo* request_info =
228 content::ResourceRequestInfo::ForRequest(response);
229 if (!request_info)
230 return false;
231
232 switch (request_info->GetResourceType()) {
dominich 2012/05/21 16:16:53 As above
Shishir 2012/05/23 01:46:46 Done.
233 case ResourceType::MAIN_FRAME:
234 return IsHandledMainPage(response);
235 default:
236 return false;
237 }
238 }
239
240 bool ResourcePrefetchPredictor::IsHandledMainPage(net::URLRequest* request) {
241 if (request->original_url().scheme() != chrome::kHttpScheme)
dominich 2012/05/21 16:16:53 there's too many negatives here. How about: retur
Shishir 2012/05/23 01:46:46 Done.
242 return false;
243 return true;
244 }
245
246 bool ResourcePrefetchPredictor::IsHandledSubresource(
247 net::URLRequest* response) {
248 // If the embedding main page is not HTTP, we dont care.
249 if (response->first_party_for_cookies().scheme() != chrome::kHttpScheme)
250 return false;
251
252 // Check the scheme of the orign. We only do http.
dominich 2012/05/21 16:16:53 nit: origin. Also, consider if these comments are
Shishir 2012/05/23 01:46:46 Removed trivial comments.
253 if (response->original_url().scheme() != chrome::kHttpScheme)
254 return false;
255
256 // We can only deal with a few mime types.
257 std::string mime_type;
258 response->GetMimeType(&mime_type);
259 if (!mime_type.empty() &&
260 !net::IsSupportedImageMimeType(mime_type.c_str()) &&
261 !net::IsSupportedJavascriptMimeType(mime_type.c_str()) &&
262 !net::MatchesMimeType("text/css", mime_type)) {
263 return false;
264 }
265
266 // Only lookup get requests.
267 if (response->method() != "GET")
268 return false;
269
270 if (response->original_url().spec().length() > kMaxSubresourceUrlLengthBytes)
271 return false;
272
273 bool is_cacheable = IsCacheable(response);
274 UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.IsCacheableResource",
275 is_cacheable);
276 if (!is_cacheable)
dominich 2012/05/21 16:16:53 return is_cacheable;
Shishir 2012/05/23 01:46:46 Done.
277 return false;
278
279 return true;
280 }
281
282 bool ResourcePrefetchPredictor::IsCacheable(net::URLRequest* response) {
dominich 2012/05/21 16:16:53 const net::URLRequest* response?
Shishir 2012/05/23 01:46:46 Done.
283 // If this was serverd from cache, we are good.
dominich 2012/05/21 16:16:53 nit: served
Shishir 2012/05/23 01:46:46 Removed comment.
284 if (response->was_cached())
285 return true;
286
287 // For non cached responses, we will ensure that the freshness lifetime is
288 // some sane value.
289 const net::HttpResponseInfo& response_info = response->response_info();
290 base::Time response_time(response_info.response_time);
291 response_time += base::TimeDelta::FromSeconds(1);
292 base::TimeDelta freshness = response_info.headers->GetFreshnessLifetime(
293 response_time);
294 return freshness > base::TimeDelta();
295 }
296
297 void ResourcePrefetchPredictor::RecordURLRequest(
298 const URLRequestSummary& request) {
299 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
300
301 switch (request.resource_type_) {
dominich 2012/05/21 16:16:53 I find 'if' much more readable in the case of a si
Shishir 2012/05/23 01:46:46 Done.
302 case ResourceType::MAIN_FRAME:
303 OnMainFrameRequest(request);
304 break;
305 default:
306 NOTREACHED() << "Unhandled RecordURLRequest";
307 }
308 }
309
310 void ResourcePrefetchPredictor::RecordUrlResponse(
311 const URLRequestSummary& response) {
312 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
313
314 switch (response.resource_type_) {
dominich 2012/05/21 16:16:53 if/else is more readable
Shishir 2012/05/23 01:46:46 Done.
315 case ResourceType::MAIN_FRAME:
316 OnMainFrameResponse(response);
317 break;
318 default:
319 OnSubresourceResponse(response);
320 }
321 }
322
323 void ResourcePrefetchPredictor::RecordUrlRedirect(
324 const URLRequestSummary& response) {
325 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
326
327 switch (response.resource_type_) {
328 case ResourceType::MAIN_FRAME:
dominich 2012/05/21 16:16:53 if rather than switch.
Shishir 2012/05/23 01:46:46 Done.
329 OnMainFrameRedirect(response);
330 break;
331 default:
332 NOTREACHED() << "Unhandled RecordUrlRedirect";
333 }
334 }
335
336 void ResourcePrefetchPredictor::OnMainFrameRequest(
337 const URLRequestSummary& request) {
338 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
339 if (!initialized_)
340 return;
341
342 // It is possible to see this multiple times for the exact same navigation.
343 // TODO(shishir): Maybe fix it.
dominich 2012/05/21 16:16:53 Enter a bug for this and reference it here.
Shishir 2012/05/23 01:46:46 This should not be an issue now that we dont use a
344 NavigationMap::const_iterator it =
345 inflight_navigations_.find(request.navigation_id_);
346 if (it != inflight_navigations_.end()) {
347 if (it->first.creation_time_ == request.navigation_id_.creation_time_) {
348 LOG(ERROR) << "Multiple OnMainFrameRequest for same navigation." <<
349 it->first.creation_time_.ToInternalValue();
350 return;
351 }
352 }
353
354 // Cleanup older navigations.
355 CleanupAbandonedNavigations(request.navigation_id_);
356
357 // New empty navigation entry.
358 inflight_navigations_[request.navigation_id_];
dominich 2012/05/21 16:16:53 Please use insert here with a default constructed
Shishir 2012/05/23 01:46:46 Done.
359 }
360
361 void ResourcePrefetchPredictor::OnMainFrameResponse(
362 const URLRequestSummary& response) {
363 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
364 if (!initialized_)
365 return;
366
367 // TODO(shishir): The prefreshing will be stopped here.
dominich 2012/05/21 16:16:53 Can you add this as part of this CL?
Shishir 2012/05/23 01:46:46 The entire prefreshing is missing from this CL. To
368 }
369
370 void ResourcePrefetchPredictor::OnMainFrameRedirect(
371 const URLRequestSummary& response) {
372 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
373 if (!initialized_)
374 return;
375
376 inflight_navigations_.erase(response.navigation_id_);
dominich 2012/05/21 16:16:53 can you add a comment explaining why we're not sto
Shishir 2012/05/23 01:46:46 Because we are not actually doing any prefreshing
377 }
378
379 void ResourcePrefetchPredictor::OnSubresourceResponse(
380 const URLRequestSummary& response) {
381 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
382
383 if (inflight_navigations_.find(response.navigation_id_) ==
384 inflight_navigations_.end())
385 return;
386
387 inflight_navigations_[response.navigation_id_].push_back(response);
388 }
389
390 void ResourcePrefetchPredictor::OnSubresourceLoadedFromMemory(
391 const NavigationID& navigation_id,
392 const GURL& resource_url) {
393 if (inflight_navigations_.find(navigation_id) == inflight_navigations_.end())
394 return;
395
396 URLRequestSummary summary;
397 summary.navigation_id_ = navigation_id;
398 summary.resource_url_ = resource_url;
399 summary.resource_type_ = ResourceType::LAST_TYPE; // Dont have type here.
dominich 2012/05/21 16:16:53 You could add it - WebContentsImpl::OnDidLoadResou
Shishir 2012/05/23 01:46:46 The resource_type is not very accurate. I am addin
400 summary.was_cached_ = true;
401 inflight_navigations_[navigation_id].push_back(summary);
402 }
403
404 void ResourcePrefetchPredictor::CleanupAbandonedNavigations(
405 const NavigationID& navigation_id) {
406 static const base::TimeDelta max_navigation_age =
407 base::TimeDelta::FromSeconds(kMaxNavigationLifetimeSeconds);
408
409 base::TimeTicks time_now = base::TimeTicks::Now();
410 for (NavigationMap::iterator it = inflight_navigations_.begin();
411 it != inflight_navigations_.end();) {
dominich 2012/05/21 16:16:53 You can increment |it| in the for loop as map::era
Shishir 2012/05/23 01:46:46 That actually shouldn't work because 'it' itself s
412 if (it->first.IsSameRenderer(navigation_id) ||
413 (time_now - it->first.creation_time_ > max_navigation_age)) {
414 inflight_navigations_.erase(it++);
415 UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.DidNavigationComplete",
416 false);
417 } else {
418 ++it;
419 }
420 }
421 }
422
423 void ResourcePrefetchPredictor::ShutdownOnUIThread() {
424 notification_registrar_.reset(NULL);
425 }
426
427 void ResourcePrefetchPredictor::Observe(
428 int type,
429 const content::NotificationSource& source,
430 const content::NotificationDetails& details) {
431 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
dominich 2012/05/21 16:16:53 DCHECK instead of CHECK.
Shishir 2012/05/23 01:46:46 I have all the thread checks as CHECKS and DCHECKs
432
433 switch (type) {
434 case content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME: {
435 const content::WebContents* web_contents =
436 content::Source<content::WebContents>(source).ptr();
437 NavigationID navigation_id(*web_contents);
438 OnNavigationComplete(navigation_id);
439 break;
440 }
441
442 case content::NOTIFICATION_LOAD_FROM_MEMORY_CACHE: {
443 const LoadFromMemoryCacheDetails* load_details =
444 content::Details<LoadFromMemoryCacheDetails>(details).ptr();
445 const content::WebContents* web_contents =
446 content::Source<content::NavigationController>(
447 source).ptr()->GetWebContents();
448
449 NavigationID navigation_id(*web_contents);
450 OnSubresourceLoadedFromMemory(navigation_id, load_details->url());
451 break;
452 }
453
454 case chrome::NOTIFICATION_HISTORY_LOADED: {
455 DCHECK(!initialized_);
456 notification_registrar_->Remove(this,
457 chrome::NOTIFICATION_HISTORY_LOADED,
458 content::Source<Profile>(profile_));
459 OnHistoryAndCacheLoaded();
460 break;
461 }
462
463 default:
464 NOTREACHED() << "Unexpected notification observed.";
465 break;
466 }
467 }
468
469 void ResourcePrefetchPredictor::OnHistoryAndCacheLoaded() {
470 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
471 CHECK(!initialized_);
472
473 // Update the data with last visit info from in memory history db.
474 HistoryService* history_service =
475 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
476 if (history_service && history_service->InMemoryDatabase()) {
dominich 2012/05/21 16:16:53 I think you can DCHECK on history_service, or even
Shishir 2012/05/23 01:46:46 Done.
477 history::URLDatabase* url_db = history_service->InMemoryDatabase();
dominich 2012/05/21 16:16:53 Then you can store this in the local var outside t
Shishir 2012/05/23 01:46:46 Done.
478
479 std::vector<GURL> urls_to_delete;
480 for (UrlTableCacheMap::iterator it = url_table_cache_.begin();
481 it != url_table_cache_.end();) {
dominich 2012/05/21 16:16:53 Increment |it| in the for loop. map::erase doesn't
Shishir 2012/05/23 01:46:46 As above the after the erase call, the 'it' itself
482 history::URLRow url_row;
483 if (url_db->GetRowForURL(it->first, &url_row) == 0) {
484 urls_to_delete.push_back(it->first);
485 url_table_cache_.erase(it++);
486 } else {
487 it->second.last_visit_ = url_row.last_visit();
488 ++it;
489 }
490 }
491 if (!urls_to_delete.empty())
492 BrowserThread::PostTask(BrowserThread::DB, FROM_HERE,
493 base::Bind(&ResourcePrefetchPredictorTables::DeleteUrlRows,
494 tables_,
495 urls_to_delete));
496 }
497
498 notification_registrar_->Add(
499 this,
500 content::NOTIFICATION_LOAD_FROM_MEMORY_CACHE,
501 content::NotificationService::AllSources());
502
503 notification_registrar_->Add(
504 this,
505 content::NOTIFICATION_LOAD_COMPLETED_MAIN_FRAME,
506 content::NotificationService::AllSources());
507
508 // TODO(shishir): Maybe listen for notifications for navigation being
509 // abandoned and cleanup the inflight_navigations_.
510
511 initialized_ = true;
512 }
513
514 bool ResourcePrefetchPredictor::ShouldTrackUrl(const GURL& url) {
515 HistoryService* history_service =
516 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
517 if (!history_service)
518 return false;
519 history::URLDatabase* url_db = history_service->InMemoryDatabase();
520 if (!url_db)
521 return false;
522
523 history::URLRow url_row;
524 return url_db->GetRowForURL(url, &url_row) != 0 &&
525 url_row.visit_count() >= kMinUrlVisitCount;
526 }
527
528 void ResourcePrefetchPredictor::OnNavigationComplete(
529 const NavigationID& navigation_id) {
530 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
531
532 UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.DidNavigationComplete",
533 true);
534
535 // The following should probably be a DCHECK.
536 if (inflight_navigations_.find(navigation_id) == inflight_navigations_.end())
dominich 2012/05/21 16:16:53 So make it a DCHECK :)
Shishir 2012/05/23 01:46:46 Done.
537 return;
538
539 // Report any stats.
540 MaybeReportAccuracyStats(navigation_id);
541
542 // Update the URL table.
543 const GURL& main_frame_url = navigation_id.main_frame_url_;
544 if (url_table_cache_.find(main_frame_url) != url_table_cache_.end() ||
dominich 2012/05/21 16:16:53 Is it worth putting the cache_ check inside Should
Shishir 2012/05/23 01:46:46 Done.
545 ShouldTrackUrl(main_frame_url))
546 LearnUrlNavigation(main_frame_url, inflight_navigations_[navigation_id]);
547
548 // Remove the navigation.
549 inflight_navigations_.erase(navigation_id);
550 }
551
552 void ResourcePrefetchPredictor::LearnUrlNavigation(
553 const GURL& main_frame_url,
554 const std::vector<URLRequestSummary>& new_resources) {
555 CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
dominich 2012/05/21 16:16:53 Can any of this be done as a PostTask to avoid blo
Shishir 2012/05/23 01:46:46 We could potentially post this to the DB thread le
556
557 if (url_table_cache_.find(main_frame_url) == url_table_cache_.end()) {
558 if (url_table_cache_.size() >= kMaxNumUrlsToTrack)
559 RemoveAnEntryFromUrlDB();
560
561 url_table_cache_[main_frame_url].last_visit_ = base::Time::Now();
562 for (int i = 0; i < static_cast<int>(new_resources.size()); ++i) {
dominich 2012/05/21 16:16:53 cache the loop end variable outside the loop to av
Shishir 2012/05/23 01:46:46 Done.
563 UrlTableRow row_to_add;
564 row_to_add.main_frame_url_ = main_frame_url;
565 row_to_add.resource_url_ = new_resources[i].resource_url_;
566 row_to_add.resource_type_ = new_resources[i].resource_type_;
567 row_to_add.number_of_hits_ = 1;
568 row_to_add.average_position_ = i + 1;
569 url_table_cache_[main_frame_url].rows_.push_back(row_to_add);
570 }
571 } else {
572 UrlTableRowVector& old_resources = url_table_cache_[main_frame_url].rows_;
573 url_table_cache_[main_frame_url].last_visit_ = base::Time::Now();
574
575 // Build indices over the data.
576 std::map<GURL, int> new_index, old_index;
577 for (int i = 0; i < static_cast<int>(new_resources.size()); ++i) {
dominich 2012/05/21 16:16:53 cache the loop end var in a local variable.
Shishir 2012/05/23 01:46:46 Done.
578 const URLRequestSummary& summary = new_resources[i];
579 // Take the first occurence of every url.
580 if (new_index.find(summary.resource_url_) == new_index.end())
581 new_index[summary.resource_url_] = i;
582 }
583 for (int i = 0; i < static_cast<int>(old_resources.size()); ++i) {
dominich 2012/05/21 16:16:53 cache the loop end var in a local variable.
Shishir 2012/05/23 01:46:46 Done.
584 const UrlTableRow& row = old_resources[i];
585 DCHECK(old_index.find(row.resource_url_) == old_index.end());
586 old_index[row.resource_url_] = i;
587 }
588
589 // Go through the old urls and update their hit/miss counts.
590 for (int i = 0; i < static_cast<int>(old_resources.size()); ++i) {
dominich 2012/05/21 16:16:53 cache loop end var.
Shishir 2012/05/23 01:46:46 Done.
591 UrlTableRow& old_row = old_resources[i];
592 if (new_index.find(old_row.resource_url_) == new_index.end()) {
593 old_row.number_of_misses_++;
dominich 2012/05/21 16:16:53 prefer pre-increment.
Shishir 2012/05/23 01:46:46 Done.
594 old_row.consecutive_misses_++;
595 } else {
596 const URLRequestSummary& new_row =
597 new_resources[new_index[old_row.resource_url_]];
dominich 2012/05/21 16:16:53 you could remove it from new_resources here to mak
Shishir 2012/05/23 01:46:46 Wont removing the struct from the vector be more i
598
599 // Update the resource type if its missing.
dominich 2012/05/21 16:16:53 nit: it's
Shishir 2012/05/23 01:46:46 Done.
600 if (old_row.resource_type_ == ResourceType::LAST_TYPE)
601 old_row.resource_type_ = new_row.resource_type_;
dominich 2012/05/21 16:16:53 what if the resource_type has changed? unlikely, b
Shishir 2012/05/23 01:46:46 Fixed.
602
603 int position = new_index[old_row.resource_url_] + 1;
604 int total = old_row.number_of_hits_ + old_row.number_of_misses_;
605 old_row.average_position_ =
606 ((old_row.average_position_ * total) + position) / (total + 1);
607 old_row.number_of_hits_++;
dominich 2012/05/21 16:16:53 prefer pre-increment.
Shishir 2012/05/23 01:46:46 Done.
608 old_row.consecutive_misses_ = 0;
609 }
610 }
611
612 // Add the new ones that we have not seen before.
613 for (int i = 0; i < static_cast<int>(new_resources.size()); ++i) {
dominich 2012/05/21 16:16:53 cache loop var.
Shishir 2012/05/23 01:46:46 Done.
614 const URLRequestSummary& summary = new_resources[i];
615 if (old_index.find(summary.resource_url_) != old_index.end())
dominich 2012/05/21 16:16:53 This would be a DCHECK if you remove from new_reso
Shishir 2012/05/23 01:46:46 Pending reply on the above comemnt.
616 continue;
617
618 // Only need to add new stuff.
619 UrlTableRow row_to_add;
620 row_to_add.main_frame_url_ = main_frame_url;
621 row_to_add.resource_url_ = summary.resource_url_;
622 row_to_add.resource_type_ = summary.resource_type_;
623 row_to_add.number_of_hits_ = 1;
624 row_to_add.average_position_ = i + 1;
625 old_resources.push_back(row_to_add);
626
627 // To ensure we dont add the same url twice.
628 old_index[summary.resource_url_] = 0;
629 }
630 }
631
632 // Trim and sort the rows after the update.
633 UrlTableRowVector& rows = url_table_cache_[main_frame_url].rows_;
634 for (int i = rows.size() - 1; i >= 0; --i) {
dominich 2012/05/21 16:16:53 use iterator loop here.
Shishir 2012/05/23 01:46:46 Done.
635 UrlTableRow& row = rows[i];
636 row.UpdateScore();
637 if (row.consecutive_misses_ >= kMaxConsecutiveMisses)
638 rows.erase(rows.begin() + i);
639 }
640 std::sort(rows.begin(), rows.end(),
641 ResourcePrefetchPredictorTables::UrlTableRowSorter());
642
643 BrowserThread::PostTask(
644 BrowserThread::DB, FROM_HERE,
645 base::Bind(&ResourcePrefetchPredictorTables::UpdateRowsForUrl,
646 tables_,
647 main_frame_url,
648 rows));
649 }
650
651 void ResourcePrefetchPredictor::RemoveAnEntryFromUrlDB() {
652 if (url_table_cache_.empty())
653 return;
654
655 // TODO(shishir): Maybe use a heap to do this more efficiently.
656 base::Time oldest_time;
657 GURL url_to_erase;
658 for (UrlTableCacheMap::iterator it = url_table_cache_.begin();
659 it != url_table_cache_.end(); ++it) {
660 if (url_to_erase.is_empty() || it->second.last_visit_ < oldest_time) {
661 url_to_erase = it->first;
662 oldest_time = it->second.last_visit_;
663 }
664 }
665 url_table_cache_.erase(url_to_erase);
666
667 std::vector<GURL> urls_to_delete(1, url_to_erase);
668 BrowserThread::PostTask(BrowserThread::DB, FROM_HERE,
669 base::Bind(&ResourcePrefetchPredictorTables::DeleteUrlRows,
670 tables_,
671 urls_to_delete));
672 }
673
674 void ResourcePrefetchPredictor::MaybeReportAccuracyStats(
675 const NavigationID& navigation_id) {
676 const GURL& main_frame_url = navigation_id.main_frame_url_;
677 DCHECK(inflight_navigations_.find(navigation_id) !=
678 inflight_navigations_.end());
679
680 bool have_predictions_for_url =
681 url_table_cache_.find(main_frame_url) != url_table_cache_.end();
682 UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.HavePredictionsForUrl",
683 have_predictions_for_url);
684 if (!have_predictions_for_url)
685 return;
686
687 const std::vector<URLRequestSummary>& actual =
688 inflight_navigations_[navigation_id];
689 const UrlTableRowVector& predicted = url_table_cache_[main_frame_url].rows_;
690
691 std::map<GURL, bool> actual_resources;
692 for (int i = 0; i < static_cast<int>(actual.size()); ++i) {
dominich 2012/05/21 16:16:53 use iterator loop.
Shishir 2012/05/23 01:46:46 Done.
693 actual_resources[actual[i].resource_url_] = actual[i].was_cached_;
694 }
695
696 int prefetch_cached = 0, prefetch_network = 0, prefetch_missed = 0;
697 int num_assumed_prefetched = std::min(static_cast<int>(predicted.size()),
698 kNumResourcesAssumedPrefetched);
699 for (int i = 0; i < num_assumed_prefetched; ++i) {
700 const UrlTableRow& row = predicted[i];
701 if (actual_resources.find(row.resource_url_) == actual_resources.end()) {
702 prefetch_missed++;
dominich 2012/05/21 16:16:53 prefer pre-increment
Shishir 2012/05/23 01:46:46 Done.
703 } else if (actual_resources[row.resource_url_]) {
704 prefetch_cached++;
dominich 2012/05/21 16:16:53 prefer pre-increment
Shishir 2012/05/23 01:46:46 Done.
705 } else {
706 prefetch_network++;
dominich 2012/05/21 16:16:53 prefer pre-increment
Shishir 2012/05/23 01:46:46 Done.
707 }
708 }
709
710 UMA_HISTOGRAM_PERCENTAGE(
711 "ResourcePrefetchPredictor.PredictedPrefetchMisses",
712 prefetch_missed * 100.0 / num_assumed_prefetched);
713 UMA_HISTOGRAM_PERCENTAGE(
714 "ResourcePrefetchPredictor.PredictedPrefetchFromCache",
715 prefetch_cached * 100.0 / num_assumed_prefetched);
716 UMA_HISTOGRAM_PERCENTAGE(
717 "ResourcePrefetchPredictor.PredictedPrefetchFromNetwork",
718 prefetch_network * 100.0 / num_assumed_prefetched);
719 }
720
721 } // namespace predictors
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698