chrome/browser/history/scored_history_match.cc - Issue 11416285: Omnibiox: Tweak HQP New Scoring and Re-Enable Field Trial

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 11416285: Omnibiox: Tweak HQP New Scoring and Re-Enable Field Trial (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: rebase Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/scored_history_match.h"	5 #include "chrome/browser/history/scored_history_match.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <numeric>	10 #include <numeric>

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
143 float bookmark_boost =	143 float bookmark_boost =

144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0;	144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0;

145	145

146 if (use_new_scoring) {	146 if (use_new_scoring) {

147 const float topicality_score = GetTopicalityScore(	147 const float topicality_score = GetTopicalityScore(

148 terms.size(), url, url_matches, title_matches, word_starts);	148 terms.size(), url, url_matches, title_matches, word_starts);

149 const float recency_score = GetRecencyScore(	149 const float recency_score = GetRecencyScore(

150 (now - row.last_visit()).InDays());	150 (now - row.last_visit()).InDays());

151 const float popularity_score = GetPopularityScore(	151 const float popularity_score = GetPopularityScore(

152 row.typed_count() + bookmark_boost, row.visit_count());	152 row.typed_count() + bookmark_boost, row.visit_count());

153	153 raw_score = GetFinalRelevancyScore(

154 // Combine recency, popularity, and topicality scores into one.	154 topicality_score, recency_score, popularity_score);

155 // Example of how this functions: Suppose the omnibox has one

156 // input term. Suppose we have a URL that has 30 typed visits with

157 // the most recent being within a day and the omnibox input term

158 // has a single URL hostname hit at a word boundary. Then this

159 // URL will score 1200 ( = 30 * 40.0).

160 raw_score = 40.0 * topicality_score * recency_score * popularity_score;

161 raw_score =	155 raw_score =

162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;	156 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;

163 } else { // "old" scoring	157 } else { // "old" scoring

164 // Get partial scores based on term matching. Note that the score for	158 // Get partial scores based on term matching. Note that the score for

165 // each of the URL and title are adjusted by the fraction of the	159 // each of the URL and title are adjusted by the fraction of the

166 // terms appearing in each.	160 // terms appearing in each.

167 int url_score =	161 int url_score =

168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,	162 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,

169 url.length()) *	163 url.length()) *

170 std::min(url_matches.size(), terms.size()) / terms.size();	164 std::min(url_matches.size(), terms.size()) / terms.size();

(...skipping 353 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
524 // Advance next_word_starts until it's >= the position of the term	518 // Advance next_word_starts until it's >= the position of the term

525 // we're considering.	519 // we're considering.

526 while ((next_word_starts != end_word_starts) &&	520 while ((next_word_starts != end_word_starts) &&

527 (*next_word_starts < iter->offset)) {	521 (*next_word_starts < iter->offset)) {

528 ++next_word_starts;	522 ++next_word_starts;

529 ++word_num;	523 ++word_num;

530 }	524 }

531 if (word_num >= 10) break; // only count the first ten words	525 if (word_num >= 10) break; // only count the first ten words

532 const bool at_word_boundary = (next_word_starts != end_word_starts) &&	526 const bool at_word_boundary = (next_word_starts != end_word_starts) &&

533 (*next_word_starts == iter->offset);	527 (*next_word_starts == iter->offset);

534 term_scores[iter->term_num] += at_word_boundary ? 8 : 2;	528 term_scores[iter->term_num] += at_word_boundary ? 8 : 0;

535 }	529 }

536 // TODO(mpearson): Restore logic for penalizing out-of-order matches.	530 // TODO(mpearson): Restore logic for penalizing out-of-order matches.

537 // (Perhaps discount them by 0.8?)	531 // (Perhaps discount them by 0.8?)

538 // TODO(mpearson): Consider: if the earliest match occurs late in the string,	532 // TODO(mpearson): Consider: if the earliest match occurs late in the string,

539 // should we discount it?	533 // should we discount it?

540 // TODO(mpearson): Consider: do we want to score based on how much of the	534 // TODO(mpearson): Consider: do we want to score based on how much of the

541 // input string the input covers? (I'm leaning toward no.)	535 // input string the input covers? (I'm leaning toward no.)

542	536

543 // Compute the topicality_score as the sum of transformed term_scores.	537 // Compute the topicality_score as the sum of transformed term_scores.

544 float topicality_score = 0;	538 float topicality_score = 0;

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
637 }	631 }

638	632

639 // static	633 // static

640 float ScoredHistoryMatch::GetPopularityScore(int typed_count,	634 float ScoredHistoryMatch::GetPopularityScore(int typed_count,

641 int visit_count) {	635 int visit_count) {

642 // The max()s are to guard against database corruption.	636 // The max()s are to guard against database corruption.

643 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) /	637 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) /

644 (5.0 + 3.0);	638 (5.0 + 3.0);

645 }	639 }

646	640

	641 // static

	642 float ScoredHistoryMatch::GetFinalRelevancyScore(

	643 float topicality_score, float recency_score, float popularity_score) {

	644 // Here's how to interpret intermediate_score: Suppose the omnibox

	645 // has one input term. Suppose we have a URL that has 5 typed

	646 // visits with the most recent being within a day and the omnibox

	647 // input term has a single URL hostname hit at a word boundary.

	648 // This URL will have an intermediate_score of 5.0 (= 1 topicality *

	649 // 1 recency * 5 popularity).

	650 float intermediate_score =

	651 topicality_score * recency_score * popularity_score;

	652 // The below code takes intermediate_score from [0, infinity) to

	653 // relevancy scores in the range [0, 1400).

	654 float attenuating_factor = 1.0;

	655 if (intermediate_score < 4) {

	656 // The formula in the final return line in this function only works if

	657 // intermediate_score > 4. For lower scores, we linearly interpolate

	658 // between 0 and the formula when intermediate_score = 4.0.

	659 attenuating_factor = intermediate_score / 4.0;

	660 intermediate_score = 4.0;

	661 }

	662 DCHECK_GE(intermediate_score, 4.0);

	663 return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score));

	664 }

	665

647 void ScoredHistoryMatch::InitializeNewScoringField() {	666 void ScoredHistoryMatch::InitializeNewScoringField() {

648 enum NewScoringOption {	667 enum NewScoringOption {

649 OLD_SCORING = 0,	668 OLD_SCORING = 0,

650 NEW_SCORING = 1,	669 NEW_SCORING = 1,

651 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2,	670 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2,

652 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3,	671 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3,

653 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4,	672 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4,

654 NUM_OPTIONS = 5	673 NUM_OPTIONS = 5

655 };	674 };

656 // should always be overwritten	675 // should always be overwritten

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
711 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();	730 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();

712 }	731 }

713	732

714 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {	733 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {

715 also_do_hup_like_scoring =	734 also_do_hup_like_scoring =

716 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&	735 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&

717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();	736 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();

718 }	737 }

719	738

720 } // namespace history	739 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/scored_history_match.h ('k') | chrome/browser/history/scored_history_match_unittest.cc » ('j') | no next file with comments »