Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 11416285: Omnibiox: Tweak HQP New Scoring and Re-Enable Field Trial (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/scored_history_match.h" 5 #include "chrome/browser/history/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <numeric> 10 #include <numeric>
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 float bookmark_boost = 143 float bookmark_boost =
144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; 144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0;
145 145
146 if (use_new_scoring) { 146 if (use_new_scoring) {
147 const float topicality_score = GetTopicalityScore( 147 const float topicality_score = GetTopicalityScore(
148 terms.size(), url, url_matches, title_matches, word_starts); 148 terms.size(), url, url_matches, title_matches, word_starts);
149 const float recency_score = GetRecencyScore( 149 const float recency_score = GetRecencyScore(
150 (now - row.last_visit()).InDays()); 150 (now - row.last_visit()).InDays());
151 const float popularity_score = GetPopularityScore( 151 const float popularity_score = GetPopularityScore(
152 row.typed_count() + bookmark_boost, row.visit_count()); 152 row.typed_count() + bookmark_boost, row.visit_count());
153 153 raw_score = GetFinalRelevancyScore(
154 // Combine recency, popularity, and topicality scores into one. 154 topicality_score, recency_score, popularity_score);
155 // Example of how this functions: Suppose the omnibox has one
156 // input term. Suppose we have a URL that has 30 typed visits with
157 // the most recent being within a day and the omnibox input term
158 // has a single URL hostname hit at a word boundary. Then this
159 // URL will score 1200 ( = 30 * 40.0).
160 raw_score = 40.0 * topicality_score * recency_score * popularity_score;
161 raw_score = 155 raw_score =
162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; 156 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;
163 } else { // "old" scoring 157 } else { // "old" scoring
164 // Get partial scores based on term matching. Note that the score for 158 // Get partial scores based on term matching. Note that the score for
165 // each of the URL and title are adjusted by the fraction of the 159 // each of the URL and title are adjusted by the fraction of the
166 // terms appearing in each. 160 // terms appearing in each.
167 int url_score = 161 int url_score =
168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, 162 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,
169 url.length()) * 163 url.length()) *
170 std::min(url_matches.size(), terms.size()) / terms.size(); 164 std::min(url_matches.size(), terms.size()) / terms.size();
(...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 // Advance next_word_starts until it's >= the position of the term 518 // Advance next_word_starts until it's >= the position of the term
525 // we're considering. 519 // we're considering.
526 while ((next_word_starts != end_word_starts) && 520 while ((next_word_starts != end_word_starts) &&
527 (*next_word_starts < iter->offset)) { 521 (*next_word_starts < iter->offset)) {
528 ++next_word_starts; 522 ++next_word_starts;
529 ++word_num; 523 ++word_num;
530 } 524 }
531 if (word_num >= 10) break; // only count the first ten words 525 if (word_num >= 10) break; // only count the first ten words
532 const bool at_word_boundary = (next_word_starts != end_word_starts) && 526 const bool at_word_boundary = (next_word_starts != end_word_starts) &&
533 (*next_word_starts == iter->offset); 527 (*next_word_starts == iter->offset);
534 term_scores[iter->term_num] += at_word_boundary ? 8 : 2; 528 term_scores[iter->term_num] += at_word_boundary ? 8 : 0;
535 } 529 }
536 // TODO(mpearson): Restore logic for penalizing out-of-order matches. 530 // TODO(mpearson): Restore logic for penalizing out-of-order matches.
537 // (Perhaps discount them by 0.8?) 531 // (Perhaps discount them by 0.8?)
538 // TODO(mpearson): Consider: if the earliest match occurs late in the string, 532 // TODO(mpearson): Consider: if the earliest match occurs late in the string,
539 // should we discount it? 533 // should we discount it?
540 // TODO(mpearson): Consider: do we want to score based on how much of the 534 // TODO(mpearson): Consider: do we want to score based on how much of the
541 // input string the input covers? (I'm leaning toward no.) 535 // input string the input covers? (I'm leaning toward no.)
542 536
543 // Compute the topicality_score as the sum of transformed term_scores. 537 // Compute the topicality_score as the sum of transformed term_scores.
544 float topicality_score = 0; 538 float topicality_score = 0;
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
637 } 631 }
638 632
639 // static 633 // static
640 float ScoredHistoryMatch::GetPopularityScore(int typed_count, 634 float ScoredHistoryMatch::GetPopularityScore(int typed_count,
641 int visit_count) { 635 int visit_count) {
642 // The max()s are to guard against database corruption. 636 // The max()s are to guard against database corruption.
643 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / 637 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) /
644 (5.0 + 3.0); 638 (5.0 + 3.0);
645 } 639 }
646 640
641 // static
642 float ScoredHistoryMatch::GetFinalRelevancyScore(
643 float topicality_score, float recency_score, float popularity_score) {
644 // Here's how to interpret intermediate_score: Suppose the omnibox
645 // has one input term. Suppose we have a URL that has 5 typed
646 // visits with the most recent being within a day and the omnibox
647 // input term has a single URL hostname hit at a word boundary.
648 // This URL will have an intermediate_score of 5.0 (= 1 topicality *
649 // 1 recency * 5 popularity).
650 float intermediate_score =
651 topicality_score * recency_score * popularity_score;
652 // The below code takes intermediate_score from [0, infinity) to
653 // relevancy scores in the range [0, 1400).
654 float attenuating_factor = 1.0;
655 if (intermediate_score < 4) {
656 // The formula in the final return line in this function only works if
657 // intermediate_score > 4. For lower scores, we linearly interpolate
658 // between 0 and the formula when intermediate_score = 4.0.
659 attenuating_factor = intermediate_score / 4.0;
660 intermediate_score = 4.0;
661 }
662 DCHECK_GE(intermediate_score, 4.0);
663 return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score));
664 }
665
647 void ScoredHistoryMatch::InitializeNewScoringField() { 666 void ScoredHistoryMatch::InitializeNewScoringField() {
648 enum NewScoringOption { 667 enum NewScoringOption {
649 OLD_SCORING = 0, 668 OLD_SCORING = 0,
650 NEW_SCORING = 1, 669 NEW_SCORING = 1,
651 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, 670 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2,
652 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, 671 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3,
653 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, 672 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4,
654 NUM_OPTIONS = 5 673 NUM_OPTIONS = 5
655 }; 674 };
656 // should always be overwritten 675 // should always be overwritten
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
711 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); 730 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();
712 } 731 }
713 732
714 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { 733 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {
715 also_do_hup_like_scoring = 734 also_do_hup_like_scoring =
716 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && 735 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&
717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); 736 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();
718 } 737 }
719 738
720 } // namespace history 739 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/scored_history_match.h ('k') | chrome/browser/history/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698