OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <numeric> | 10 #include <numeric> |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 // counts this score, which then gets modified slightly. This number | 49 // counts this score, which then gets modified slightly. This number |
50 // is derived from the score of 900 + kMaxMatches used in HistoryURL | 50 // is derived from the score of 900 + kMaxMatches used in HistoryURL |
51 // provider's CalculateRelevance() function for matches that are not | 51 // provider's CalculateRelevance() function for matches that are not |
52 // worthy of being inline autopleted. | 52 // worthy of being inline autopleted. |
53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; | 53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; |
54 | 54 |
55 // ScoredHistoryMatch ---------------------------------------------------------- | 55 // ScoredHistoryMatch ---------------------------------------------------------- |
56 | 56 |
57 bool ScoredHistoryMatch::initialized_ = false; | 57 bool ScoredHistoryMatch::initialized_ = false; |
58 bool ScoredHistoryMatch::use_new_scoring = false; | 58 bool ScoredHistoryMatch::use_new_scoring = false; |
| 59 bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false; |
59 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; | 60 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; |
60 | 61 |
61 ScoredHistoryMatch::ScoredHistoryMatch() | 62 ScoredHistoryMatch::ScoredHistoryMatch() |
62 : raw_score(0), | 63 : raw_score(0), |
63 can_inline(false) { | 64 can_inline(false) { |
64 if (!initialized_) { | 65 if (!initialized_) { |
65 InitializeNewScoringField(); | 66 InitializeNewScoringField(); |
| 67 InitializeOnlyCountMatchesAtWordBoundariesField(); |
66 InitializeAlsoDoHUPLikeScoringField(); | 68 InitializeAlsoDoHUPLikeScoringField(); |
67 initialized_ = true; | 69 initialized_ = true; |
68 } | 70 } |
69 } | 71 } |
70 | 72 |
71 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, | 73 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, |
72 const string16& lower_string, | 74 const string16& lower_string, |
73 const String16Vector& terms, | 75 const String16Vector& terms, |
74 const RowWordStarts& word_starts, | 76 const RowWordStarts& word_starts, |
75 const base::Time now, | 77 const base::Time now, |
76 BookmarkService* bookmark_service) | 78 BookmarkService* bookmark_service) |
77 : HistoryMatch(row, 0, false, false), | 79 : HistoryMatch(row, 0, false, false), |
78 raw_score(0), | 80 raw_score(0), |
79 can_inline(false) { | 81 can_inline(false) { |
80 if (!initialized_) { | 82 if (!initialized_) { |
81 InitializeNewScoringField(); | 83 InitializeNewScoringField(); |
| 84 InitializeOnlyCountMatchesAtWordBoundariesField(); |
82 InitializeAlsoDoHUPLikeScoringField(); | 85 InitializeAlsoDoHUPLikeScoringField(); |
83 initialized_ = true; | 86 initialized_ = true; |
84 } | 87 } |
85 | 88 |
86 GURL gurl = row.url(); | 89 GURL gurl = row.url(); |
87 if (!gurl.is_valid()) | 90 if (!gurl.is_valid()) |
88 return; | 91 return; |
89 | 92 |
90 // Figure out where each search term appears in the URL and/or page title | 93 // Figure out where each search term appears in the URL and/or page title |
91 // so that we can score as well as provide autocomplete highlighting. | 94 // so that we can score as well as provide autocomplete highlighting. |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
154 // the most recent being within a day and the omnibox input term | 157 // the most recent being within a day and the omnibox input term |
155 // has a single URL hostname hit at a word boundary. Then this | 158 // has a single URL hostname hit at a word boundary. Then this |
156 // URL will score 1200 ( = 30 * 40.0). | 159 // URL will score 1200 ( = 30 * 40.0). |
157 raw_score = 40.0 * topicality_score * recency_score * popularity_score; | 160 raw_score = 40.0 * topicality_score * recency_score * popularity_score; |
158 raw_score = | 161 raw_score = |
159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; | 162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; |
160 } else { // "old" scoring | 163 } else { // "old" scoring |
161 // Get partial scores based on term matching. Note that the score for | 164 // Get partial scores based on term matching. Note that the score for |
162 // each of the URL and title are adjusted by the fraction of the | 165 // each of the URL and title are adjusted by the fraction of the |
163 // terms appearing in each. | 166 // terms appearing in each. |
164 int url_score = ScoreComponentForMatches(url_matches, url.length()) * | 167 int url_score = |
| 168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, |
| 169 url.length()) * |
165 std::min(url_matches.size(), terms.size()) / terms.size(); | 170 std::min(url_matches.size(), terms.size()) / terms.size(); |
166 int title_score = | 171 int title_score = |
167 ScoreComponentForMatches(title_matches, title.length()) * | 172 ScoreComponentForMatches(title_matches, word_starts.title_word_starts_, |
| 173 title.length()) * |
168 std::min(title_matches.size(), terms.size()) / terms.size(); | 174 std::min(title_matches.size(), terms.size()) / terms.size(); |
169 // Arbitrarily pick the best. | 175 // Arbitrarily pick the best. |
170 // TODO(mrossetti): It might make sense that a term which appears in both | 176 // TODO(mrossetti): It might make sense that a term which appears in both |
171 // the URL and the Title should boost the score a bit. | 177 // the URL and the Title should boost the score a bit. |
172 int term_score = std::max(url_score, title_score); | 178 int term_score = std::max(url_score, title_score); |
173 if (term_score == 0) | 179 if (term_score == 0) |
174 return; | 180 return; |
175 | 181 |
176 // Determine scoring factors for the recency of visit, visit count and typed | 182 // Determine scoring factors for the recency of visit, visit count and typed |
177 // count attributes of the URLRow. | 183 // count attributes of the URLRow. |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
280 | 286 |
281 ScoredHistoryMatch::~ScoredHistoryMatch() {} | 287 ScoredHistoryMatch::~ScoredHistoryMatch() {} |
282 | 288 |
283 // std::accumulate helper function to add up TermMatches' lengths as used in | 289 // std::accumulate helper function to add up TermMatches' lengths as used in |
284 // ScoreComponentForMatches | 290 // ScoreComponentForMatches |
285 int AccumulateMatchLength(int total, const TermMatch& match) { | 291 int AccumulateMatchLength(int total, const TermMatch& match) { |
286 return total + match.length; | 292 return total + match.length; |
287 } | 293 } |
288 | 294 |
289 // static | 295 // static |
290 int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches, | 296 int ScoredHistoryMatch::ScoreComponentForMatches( |
291 size_t max_length) { | 297 const TermMatches& provided_matches, |
| 298 const WordStarts& word_starts, |
| 299 size_t max_length) { |
| 300 if (provided_matches.empty()) |
| 301 return 0; |
| 302 |
| 303 TermMatches matches_at_word_boundaries; |
| 304 if (only_count_matches_at_word_boundaries) { |
| 305 MakeTermMatchesOnlyAtWordBoundaries(provided_matches, word_starts, |
| 306 &matches_at_word_boundaries); |
| 307 } |
| 308 // The actual matches we'll use for matching. This is |provided_matches| |
| 309 // with all the matches not at a word boundary removed (if told to do so). |
| 310 const TermMatches& matches = only_count_matches_at_word_boundaries ? |
| 311 matches_at_word_boundaries : provided_matches; |
| 312 |
292 if (matches.empty()) | 313 if (matches.empty()) |
293 return 0; | 314 return 0; |
294 | 315 |
295 // Score component for whether the input terms (if more than one) were found | 316 // Score component for whether the input terms (if more than one) were found |
296 // in the same order in the match. Start with kOrderMaxValue points divided | 317 // in the same order in the match. Start with kOrderMaxValue points divided |
297 // equally among (number of terms - 1); then discount each of those terms that | 318 // equally among (number of terms - 1); then discount each of those terms that |
298 // is out-of-order in the match. | 319 // is out-of-order in the match. |
299 const int kOrderMaxValue = 1000; | 320 const int kOrderMaxValue = 1000; |
300 int order_value = kOrderMaxValue; | 321 int order_value = kOrderMaxValue; |
301 if (matches.size() > 1) { | 322 if (matches.size() > 1) { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
339 complete_value * kCompleteRelevance; | 360 complete_value * kCompleteRelevance; |
340 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); | 361 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); |
341 | 362 |
342 // Scale the raw score into a single score component in the same manner as | 363 // Scale the raw score into a single score component in the same manner as |
343 // used in ScoredMatchForURL(). | 364 // used in ScoredMatchForURL(). |
344 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; | 365 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; |
345 return ScoreForValue(raw_score, kTermScoreLevel); | 366 return ScoreForValue(raw_score, kTermScoreLevel); |
346 } | 367 } |
347 | 368 |
348 // static | 369 // static |
| 370 void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( |
| 371 const TermMatches& provided_matches, |
| 372 const WordStarts& word_starts, |
| 373 TermMatches* matches_at_word_boundaries) { |
| 374 matches_at_word_boundaries->clear(); |
| 375 // Resize it to an upper-bound estimate of the correct size. |
| 376 matches_at_word_boundaries->reserve(provided_matches.size()); |
| 377 WordStarts::const_iterator next_word_starts = word_starts.begin(); |
| 378 for (TermMatches::const_iterator iter = provided_matches.begin(); |
| 379 iter != provided_matches.end(); ++iter) { |
| 380 // Advance next_word_starts until it's >= the position of the term |
| 381 // we're considering. |
| 382 while ((next_word_starts != word_starts.end()) && |
| 383 (*next_word_starts < iter->offset)) { |
| 384 ++next_word_starts; |
| 385 } |
| 386 if ((next_word_starts != word_starts.end()) && |
| 387 (*next_word_starts == iter->offset)) { |
| 388 // At word boundary: copy this element into |matches_at_word_boundaries|. |
| 389 matches_at_word_boundaries->push_back(*iter); |
| 390 } |
| 391 } |
| 392 } |
| 393 |
| 394 // static |
349 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { | 395 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { |
350 int i = 0; | 396 int i = 0; |
351 int rank_count = arraysize(kScoreRank); | 397 int rank_count = arraysize(kScoreRank); |
352 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? | 398 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? |
353 (value > value_ranks[i]) : (value < value_ranks[i]))) | 399 (value > value_ranks[i]) : (value < value_ranks[i]))) |
354 ++i; | 400 ++i; |
355 if (i >= rank_count) | 401 if (i >= rank_count) |
356 return 0; | 402 return 0; |
357 int score = kScoreRank[i]; | 403 int score = kScoreRank[i]; |
358 if (i > 0) { | 404 if (i > 0) { |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
650 } | 696 } |
651 | 697 |
652 // Add a beacon to the logs that'll allow us to identify later what | 698 // Add a beacon to the logs that'll allow us to identify later what |
653 // new scoring state a user is in. Do this by incrementing a bucket in | 699 // new scoring state a user is in. Do this by incrementing a bucket in |
654 // a histogram, where the bucket represents the user's new scoring state. | 700 // a histogram, where the bucket represents the user's new scoring state. |
655 UMA_HISTOGRAM_ENUMERATION( | 701 UMA_HISTOGRAM_ENUMERATION( |
656 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", | 702 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", |
657 new_scoring_option, NUM_OPTIONS); | 703 new_scoring_option, NUM_OPTIONS); |
658 } | 704 } |
659 | 705 |
| 706 void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() { |
| 707 only_count_matches_at_word_boundaries = |
| 708 AutocompleteFieldTrial:: |
| 709 InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() && |
| 710 AutocompleteFieldTrial:: |
| 711 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); |
| 712 } |
| 713 |
660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { | 714 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { |
661 also_do_hup_like_scoring = | 715 also_do_hup_like_scoring = |
662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && | 716 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && |
663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); | 717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); |
664 } | 718 } |
665 | 719 |
666 } // namespace history | 720 } // namespace history |
OLD | NEW |