OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/autocomplete/network_action_predictor.h" | |
6 | |
7 #include <math.h> | |
8 | |
9 #include <vector> | |
10 | |
11 #include "base/bind.h" | |
12 #include "base/i18n/case_conversion.h" | |
13 #include "base/metrics/histogram.h" | |
14 #include "base/string_util.h" | |
15 #include "base/stringprintf.h" | |
16 #include "base/utf_string_conversions.h" | |
17 #include "chrome/browser/autocomplete/autocomplete.h" | |
18 #include "chrome/browser/autocomplete/autocomplete_match.h" | |
19 #include "chrome/browser/autocomplete/network_action_predictor_database.h" | |
20 #include "chrome/browser/history/history.h" | |
21 #include "chrome/browser/history/history_notifications.h" | |
22 #include "chrome/browser/history/in_memory_database.h" | |
23 #include "chrome/browser/prerender/prerender_field_trial.h" | |
24 #include "chrome/browser/prerender/prerender_manager.h" | |
25 #include "chrome/browser/prerender/prerender_manager_factory.h" | |
26 #include "chrome/browser/profiles/profile.h" | |
27 #include "chrome/common/chrome_notification_types.h" | |
28 #include "chrome/common/guid.h" | |
29 #include "content/public/browser/browser_thread.h" | |
30 #include "content/public/browser/notification_details.h" | |
31 #include "content/public/browser/notification_service.h" | |
32 #include "content/public/browser/notification_source.h" | |
33 | |
34 namespace { | |
35 | |
36 const float kConfidenceCutoff[] = { | |
37 0.8f, | |
38 0.5f | |
39 }; | |
40 | |
41 const size_t kMinimumUserTextLength = 1; | |
42 const int kMinimumNumberOfHits = 3; | |
43 | |
44 COMPILE_ASSERT(arraysize(kConfidenceCutoff) == | |
45 NetworkActionPredictor::LAST_PREDICT_ACTION, | |
46 ConfidenceCutoff_count_mismatch); | |
47 | |
48 enum DatabaseAction { | |
49 DATABASE_ACTION_ADD, | |
50 DATABASE_ACTION_UPDATE, | |
51 DATABASE_ACTION_DELETE_SOME, | |
52 DATABASE_ACTION_DELETE_ALL, | |
53 DATABASE_ACTION_COUNT | |
54 }; | |
55 | |
56 bool IsAutocompleteMatchSearchType(const AutocompleteMatch& match) { | |
57 switch (match.type) { | |
58 // Matches using the user's default search engine. | |
59 case AutocompleteMatch::SEARCH_WHAT_YOU_TYPED: | |
60 case AutocompleteMatch::SEARCH_HISTORY: | |
61 case AutocompleteMatch::SEARCH_SUGGEST: | |
62 // A match that uses a non-default search engine (e.g. for tab-to-search). | |
63 case AutocompleteMatch::SEARCH_OTHER_ENGINE: | |
64 return true; | |
65 | |
66 default: | |
67 return false; | |
68 } | |
69 } | |
70 | |
71 } | |
72 | |
73 const int NetworkActionPredictor::kMaximumDaysToKeepEntry = 14; | |
74 | |
75 double NetworkActionPredictor::hit_weight_ = 1.0; | |
76 | |
77 NetworkActionPredictor::NetworkActionPredictor(Profile* profile) | |
78 : profile_(profile), | |
79 db_(new NetworkActionPredictorDatabase(profile)), | |
80 initialized_(false) { | |
81 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
82 base::Bind(&NetworkActionPredictorDatabase::Initialize, db_)); | |
83 | |
84 // Request the in-memory database from the history to force it to load so it's | |
85 // available as soon as possible. | |
86 HistoryService* history_service = | |
87 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
88 if (history_service) | |
89 history_service->InMemoryDatabase(); | |
90 | |
91 // Create local caches using the database as loaded. We will garbage collect | |
92 // rows from the caches and the database once the history service is | |
93 // available. | |
94 std::vector<NetworkActionPredictorDatabase::Row>* rows = | |
95 new std::vector<NetworkActionPredictorDatabase::Row>(); | |
96 content::BrowserThread::PostTaskAndReply( | |
97 content::BrowserThread::DB, FROM_HERE, | |
98 base::Bind(&NetworkActionPredictorDatabase::GetAllRows, db_, rows), | |
99 base::Bind(&NetworkActionPredictor::CreateCaches, AsWeakPtr(), | |
100 base::Owned(rows))); | |
101 | |
102 } | |
103 | |
104 NetworkActionPredictor::~NetworkActionPredictor() { | |
105 } | |
106 | |
107 void NetworkActionPredictor::RegisterTransitionalMatches( | |
108 const string16& user_text, | |
109 const AutocompleteResult& result) { | |
110 if (user_text.length() < kMinimumUserTextLength) | |
111 return; | |
112 const string16 lower_user_text(base::i18n::ToLower(user_text)); | |
113 | |
114 // Merge this in to an existing match if we already saw |user_text| | |
115 std::vector<TransitionalMatch>::iterator match_it = | |
116 std::find(transitional_matches_.begin(), transitional_matches_.end(), | |
117 lower_user_text); | |
118 | |
119 if (match_it == transitional_matches_.end()) { | |
120 TransitionalMatch transitional_match; | |
121 transitional_match.user_text = lower_user_text; | |
122 match_it = transitional_matches_.insert(transitional_matches_.end(), | |
123 transitional_match); | |
124 } | |
125 | |
126 for (AutocompleteResult::const_iterator it = result.begin(); | |
127 it != result.end(); ++it) { | |
128 if (std::find(match_it->urls.begin(), match_it->urls.end(), | |
129 it->destination_url) == match_it->urls.end()) { | |
130 match_it->urls.push_back(it->destination_url); | |
131 } | |
132 } | |
133 } | |
134 | |
135 void NetworkActionPredictor::ClearTransitionalMatches() { | |
136 transitional_matches_.clear(); | |
137 } | |
138 | |
139 // Given a match, return a recommended action. | |
140 NetworkActionPredictor::Action NetworkActionPredictor::RecommendAction( | |
141 const string16& user_text, | |
142 const AutocompleteMatch& match) const { | |
143 bool is_in_db = false; | |
144 const double confidence = CalculateConfidence(user_text, match, &is_in_db); | |
145 DCHECK(confidence >= 0.0 && confidence <= 1.0); | |
146 | |
147 UMA_HISTOGRAM_BOOLEAN("NetworkActionPredictor.MatchIsInDb", is_in_db); | |
148 | |
149 if (is_in_db) { | |
150 // Multiple enties with the same URL are fine as the confidence may be | |
151 // different. | |
152 tracked_urls_.push_back(std::make_pair(match.destination_url, confidence)); | |
153 UMA_HISTOGRAM_COUNTS_100("NetworkActionPredictor.Confidence", | |
154 confidence * 100); | |
155 } | |
156 | |
157 // Map the confidence to an action. | |
158 Action action = ACTION_NONE; | |
159 for (int i = 0; i < LAST_PREDICT_ACTION; ++i) { | |
160 if (confidence >= kConfidenceCutoff[i]) { | |
161 action = static_cast<Action>(i); | |
162 break; | |
163 } | |
164 } | |
165 | |
166 // Downgrade prerender to preconnect if this is a search match or if omnibox | |
167 // prerendering is disabled. There are cases when Instant will not handle a | |
168 // search suggestion and in those cases it would be good to prerender the | |
169 // search results, however search engines have not been set up to correctly | |
170 // handle being prerendered and until they are we should avoid it. | |
171 // http://crbug.com/117495 | |
172 if (action == ACTION_PRERENDER && | |
173 (IsAutocompleteMatchSearchType(match) || | |
174 !prerender::IsOmniboxEnabled(profile_))) { | |
175 action = ACTION_PRECONNECT; | |
176 } | |
177 | |
178 return action; | |
179 } | |
180 | |
181 // Return true if the suggestion type warrants a TCP/IP preconnection. | |
182 // i.e., it is now quite likely that the user will select the related domain. | |
183 // static | |
184 bool NetworkActionPredictor::IsPreconnectable(const AutocompleteMatch& match) { | |
185 return IsAutocompleteMatchSearchType(match); | |
186 } | |
187 | |
188 void NetworkActionPredictor::Shutdown() { | |
189 db_->OnPredictorDestroyed(); | |
190 } | |
191 | |
192 void NetworkActionPredictor::Observe( | |
193 int type, | |
194 const content::NotificationSource& source, | |
195 const content::NotificationDetails& details) { | |
196 switch (type) { | |
197 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: { | |
198 DCHECK(initialized_); | |
199 const content::Details<const history::URLsDeletedDetails> | |
200 urls_deleted_details = | |
201 content::Details<const history::URLsDeletedDetails>(details); | |
202 if (urls_deleted_details->all_history) | |
203 DeleteAllRows(); | |
204 else | |
205 DeleteRowsWithURLs(urls_deleted_details->rows); | |
206 break; | |
207 } | |
208 | |
209 // This notification does not catch all instances of the user navigating | |
210 // from the Omnibox, but it does catch the cases where the dropdown is open | |
211 // and those are the events we're most interested in. | |
212 case chrome::NOTIFICATION_OMNIBOX_OPENED_URL: { | |
213 DCHECK(initialized_); | |
214 | |
215 // TODO(dominich): This doesn't need to be synchronous. Investigate | |
216 // posting it as a task to be run later. | |
217 OnOmniboxOpenedUrl(*content::Details<AutocompleteLog>(details).ptr()); | |
218 break; | |
219 } | |
220 | |
221 case chrome::NOTIFICATION_HISTORY_LOADED: { | |
222 DCHECK(!initialized_); | |
223 TryDeleteOldEntries(content::Details<HistoryService>(details).ptr()); | |
224 | |
225 notification_registrar_.Remove(this, | |
226 chrome::NOTIFICATION_HISTORY_LOADED, | |
227 content::Source<Profile>(profile_)); | |
228 break; | |
229 } | |
230 | |
231 default: | |
232 NOTREACHED() << "Unexpected notification observed."; | |
233 break; | |
234 } | |
235 } | |
236 | |
237 void NetworkActionPredictor::OnOmniboxOpenedUrl(const AutocompleteLog& log) { | |
238 if (log.text.length() < kMinimumUserTextLength) | |
239 return; | |
240 | |
241 const AutocompleteMatch& match = log.result.match_at(log.selected_index); | |
242 | |
243 UMA_HISTOGRAM_BOOLEAN( | |
244 StringPrintf("Prerender.OmniboxNavigationsCouldPrerender_%.1f%s", | |
245 get_hit_weight(), | |
246 prerender::PrerenderManager::GetModeString()).c_str(), | |
247 prerender::IsOmniboxEnabled(profile_)); | |
248 | |
249 const GURL& opened_url = match.destination_url; | |
250 | |
251 // If the Omnibox triggered a prerender but the URL doesn't match the one the | |
252 // user is navigating to, cancel the prerender. | |
253 prerender::PrerenderManager* prerender_manager = | |
254 prerender::PrerenderManagerFactory::GetForProfile(profile_); | |
255 // |prerender_manager| can be NULL in incognito mode or if prerendering is | |
256 // otherwise disabled. | |
257 if (prerender_manager && !prerender_manager->IsPrerendering(opened_url)) | |
258 prerender_manager->CancelOmniboxPrerenders(); | |
259 | |
260 const string16 lower_user_text(base::i18n::ToLower(log.text)); | |
261 | |
262 BeginTransaction(); | |
263 // Traverse transitional matches for those that have a user_text that is a | |
264 // prefix of |lower_user_text|. | |
265 for (std::vector<TransitionalMatch>::const_iterator it = | |
266 transitional_matches_.begin(); it != transitional_matches_.end(); | |
267 ++it) { | |
268 if (!StartsWith(lower_user_text, it->user_text, true)) | |
269 continue; | |
270 | |
271 // Add entries to the database for those matches. | |
272 for (std::vector<GURL>::const_iterator url_it = it->urls.begin(); | |
273 url_it != it->urls.end(); ++url_it) { | |
274 DCHECK(it->user_text.length() >= kMinimumUserTextLength); | |
275 const DBCacheKey key = { it->user_text, *url_it }; | |
276 const bool is_hit = (*url_it == opened_url); | |
277 | |
278 NetworkActionPredictorDatabase::Row row; | |
279 row.user_text = key.user_text; | |
280 row.url = key.url; | |
281 | |
282 DBCacheMap::iterator it = db_cache_.find(key); | |
283 if (it == db_cache_.end()) { | |
284 row.id = guid::GenerateGUID(); | |
285 row.number_of_hits = is_hit ? 1 : 0; | |
286 row.number_of_misses = is_hit ? 0 : 1; | |
287 | |
288 AddRow(key, row); | |
289 } else { | |
290 DCHECK(db_id_cache_.find(key) != db_id_cache_.end()); | |
291 row.id = db_id_cache_.find(key)->second; | |
292 row.number_of_hits = it->second.number_of_hits + (is_hit ? 1 : 0); | |
293 row.number_of_misses = it->second.number_of_misses + (is_hit ? 0 : 1); | |
294 | |
295 UpdateRow(it, row); | |
296 } | |
297 } | |
298 } | |
299 CommitTransaction(); | |
300 | |
301 ClearTransitionalMatches(); | |
302 | |
303 // Check against tracked urls and log accuracy for the confidence we | |
304 // predicted. | |
305 for (std::vector<std::pair<GURL, double> >::const_iterator it = | |
306 tracked_urls_.begin(); it != tracked_urls_.end(); | |
307 ++it) { | |
308 if (opened_url == it->first) { | |
309 UMA_HISTOGRAM_COUNTS_100("NetworkActionPredictor.AccurateCount", | |
310 it->second * 100); | |
311 } | |
312 } | |
313 tracked_urls_.clear(); | |
314 } | |
315 | |
316 void NetworkActionPredictor::DeleteOldIdsFromCaches( | |
317 history::URLDatabase* url_db, | |
318 std::vector<NetworkActionPredictorDatabase::Row::Id>* id_list) { | |
319 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); | |
320 DCHECK(url_db); | |
321 DCHECK(id_list); | |
322 id_list->clear(); | |
323 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) { | |
324 history::URLRow url_row; | |
325 | |
326 if ((url_db->GetRowForURL(it->first.url, &url_row) == 0) || | |
327 ((base::Time::Now() - url_row.last_visit()).InDays() > | |
328 kMaximumDaysToKeepEntry)) { | |
329 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first); | |
330 DCHECK(id_it != db_id_cache_.end()); | |
331 id_list->push_back(id_it->second); | |
332 db_id_cache_.erase(id_it); | |
333 db_cache_.erase(it++); | |
334 } else { | |
335 ++it; | |
336 } | |
337 } | |
338 } | |
339 | |
340 void NetworkActionPredictor::DeleteOldEntries(history::URLDatabase* url_db) { | |
341 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); | |
342 DCHECK(!initialized_); | |
343 | |
344 std::vector<NetworkActionPredictorDatabase::Row::Id> ids_to_delete; | |
345 DeleteOldIdsFromCaches(url_db, &ids_to_delete); | |
346 | |
347 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
348 base::Bind(&NetworkActionPredictorDatabase::DeleteRows, db_, | |
349 ids_to_delete)); | |
350 | |
351 // Register for notifications and set the |initialized_| flag. | |
352 notification_registrar_.Add(this, chrome::NOTIFICATION_OMNIBOX_OPENED_URL, | |
353 content::Source<Profile>(profile_)); | |
354 notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, | |
355 content::Source<Profile>(profile_)); | |
356 initialized_ = true; | |
357 } | |
358 | |
359 void NetworkActionPredictor::CreateCaches( | |
360 std::vector<NetworkActionPredictorDatabase::Row>* rows) { | |
361 CHECK(content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); | |
362 DCHECK(!initialized_); | |
363 DCHECK(db_cache_.empty()); | |
364 DCHECK(db_id_cache_.empty()); | |
365 | |
366 for (std::vector<NetworkActionPredictorDatabase::Row>::const_iterator it = | |
367 rows->begin(); it != rows->end(); ++it) { | |
368 const DBCacheKey key = { it->user_text, it->url }; | |
369 const DBCacheValue value = { it->number_of_hits, it->number_of_misses }; | |
370 db_cache_[key] = value; | |
371 db_id_cache_[key] = it->id; | |
372 } | |
373 | |
374 // If the history service is ready, delete any old or invalid entries. | |
375 HistoryService* history_service = | |
376 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
377 if (!TryDeleteOldEntries(history_service)) { | |
378 // Wait for the notification that the history service is ready and the URL | |
379 // DB is loaded. | |
380 notification_registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, | |
381 content::Source<Profile>(profile_)); | |
382 } | |
383 } | |
384 | |
385 bool NetworkActionPredictor::TryDeleteOldEntries(HistoryService* service) { | |
386 if (!service) | |
387 return false; | |
388 | |
389 history::URLDatabase* url_db = service->InMemoryDatabase(); | |
390 if (!url_db) | |
391 return false; | |
392 | |
393 DeleteOldEntries(url_db); | |
394 return true; | |
395 } | |
396 | |
397 double NetworkActionPredictor::CalculateConfidence( | |
398 const string16& user_text, | |
399 const AutocompleteMatch& match, | |
400 bool* is_in_db) const { | |
401 const DBCacheKey key = { user_text, match.destination_url }; | |
402 | |
403 *is_in_db = false; | |
404 if (user_text.length() < kMinimumUserTextLength) | |
405 return 0.0; | |
406 | |
407 const DBCacheMap::const_iterator iter = db_cache_.find(key); | |
408 if (iter == db_cache_.end()) | |
409 return 0.0; | |
410 | |
411 *is_in_db = true; | |
412 return CalculateConfidenceForDbEntry(iter); | |
413 } | |
414 | |
415 double NetworkActionPredictor::CalculateConfidenceForDbEntry( | |
416 DBCacheMap::const_iterator iter) const { | |
417 const DBCacheValue& value = iter->second; | |
418 if (value.number_of_hits < kMinimumNumberOfHits) | |
419 return 0.0; | |
420 | |
421 const double number_of_hits = value.number_of_hits * hit_weight_; | |
422 return number_of_hits / (number_of_hits + value.number_of_misses); | |
423 } | |
424 | |
425 void NetworkActionPredictor::AddRow( | |
426 const DBCacheKey& key, | |
427 const NetworkActionPredictorDatabase::Row& row) { | |
428 if (!initialized_) | |
429 return; | |
430 | |
431 DBCacheValue value = { row.number_of_hits, row.number_of_misses }; | |
432 db_cache_[key] = value; | |
433 db_id_cache_[key] = row.id; | |
434 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
435 base::Bind(&NetworkActionPredictorDatabase::AddRow, db_, row)); | |
436 | |
437 UMA_HISTOGRAM_ENUMERATION("NetworkActionPredictor.DatabaseAction", | |
438 DATABASE_ACTION_ADD, DATABASE_ACTION_COUNT); | |
439 } | |
440 | |
441 void NetworkActionPredictor::UpdateRow( | |
442 DBCacheMap::iterator it, | |
443 const NetworkActionPredictorDatabase::Row& row) { | |
444 if (!initialized_) | |
445 return; | |
446 | |
447 DCHECK(it != db_cache_.end()); | |
448 it->second.number_of_hits = row.number_of_hits; | |
449 it->second.number_of_misses = row.number_of_misses; | |
450 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
451 base::Bind(&NetworkActionPredictorDatabase::UpdateRow, db_, row)); | |
452 UMA_HISTOGRAM_ENUMERATION("NetworkActionPredictor.DatabaseAction", | |
453 DATABASE_ACTION_UPDATE, DATABASE_ACTION_COUNT); | |
454 } | |
455 | |
456 void NetworkActionPredictor::DeleteAllRows() { | |
457 if (!initialized_) | |
458 return; | |
459 | |
460 db_cache_.clear(); | |
461 db_id_cache_.clear(); | |
462 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
463 base::Bind(&NetworkActionPredictorDatabase::DeleteAllRows, db_)); | |
464 UMA_HISTOGRAM_ENUMERATION("NetworkActionPredictor.DatabaseAction", | |
465 DATABASE_ACTION_DELETE_ALL, DATABASE_ACTION_COUNT); | |
466 } | |
467 | |
468 void NetworkActionPredictor::DeleteRowsWithURLs(const history::URLRows& rows) { | |
469 if (!initialized_) | |
470 return; | |
471 | |
472 std::vector<NetworkActionPredictorDatabase::Row::Id> id_list; | |
473 | |
474 for (DBCacheMap::iterator it = db_cache_.begin(); it != db_cache_.end();) { | |
475 if (std::find_if(rows.begin(), rows.end(), | |
476 history::URLRow::URLRowHasURL(it->first.url)) != rows.end()) { | |
477 const DBIdCacheMap::iterator id_it = db_id_cache_.find(it->first); | |
478 DCHECK(id_it != db_id_cache_.end()); | |
479 id_list.push_back(id_it->second); | |
480 db_id_cache_.erase(id_it); | |
481 db_cache_.erase(it++); | |
482 } else { | |
483 ++it; | |
484 } | |
485 } | |
486 | |
487 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
488 base::Bind(&NetworkActionPredictorDatabase::DeleteRows, db_, id_list)); | |
489 UMA_HISTOGRAM_ENUMERATION("NetworkActionPredictor.DatabaseAction", | |
490 DATABASE_ACTION_DELETE_SOME, DATABASE_ACTION_COUNT); | |
491 } | |
492 | |
493 void NetworkActionPredictor::BeginTransaction() { | |
494 if (!initialized_) | |
495 return; | |
496 | |
497 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
498 base::Bind(&NetworkActionPredictorDatabase::BeginTransaction, db_)); | |
499 } | |
500 | |
501 void NetworkActionPredictor::CommitTransaction() { | |
502 if (!initialized_) | |
503 return; | |
504 | |
505 content::BrowserThread::PostTask(content::BrowserThread::DB, FROM_HERE, | |
506 base::Bind(&NetworkActionPredictorDatabase::CommitTransaction, db_)); | |
507 } | |
508 | |
509 NetworkActionPredictor::TransitionalMatch::TransitionalMatch() { | |
510 } | |
511 | |
512 NetworkActionPredictor::TransitionalMatch::~TransitionalMatch() { | |
513 } | |
OLD | NEW |