components/omnibox/browser/scored_history_match.cc - Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation

Unified Diff: components/omnibox/browser/scored_history_match.cc

Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation (Closed)

Patch Set: peter's comments, plus git cl format the rest of the changelist Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: components/omnibox/browser/scored_history_match.cc

diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc

index 6d1b0eac25d2fbf3488bab6e4e0e8ec1e4f5bf1b..a42e984e62422feb6e2518aec2f2a86165299632 100644

--- a/components/omnibox/browser/scored_history_match.cc

+++ b/components/omnibox/browser/scored_history_match.cc

@@ -473,31 +473,34 @@ float ScoredHistoryMatch::GetTopicalityScore(

0, colon_pos);

}

for (const auto& url_match : url_matches) {

- const size_t term_offset = terms_to_word_starts_offsets[url_match.term_num];

+ // Calculate the offset in the URL string where the meaningful (word) part

+ // of the term starts. This takes into account times when a term starts

+ // with punctuation such as "/foo".

+ const size_t term_word_offset =

+ url_match.offset + terms_to_word_starts_offsets[url_match.term_num];

// Advance next_word_starts until it's >= the position of the term we're

// considering (adjusted for where the word begins within the term).

while ((next_word_starts != end_word_starts) &&

- (*next_word_starts < (url_match.offset + term_offset))) {

+ (*next_word_starts < term_word_offset)) {

++next_word_starts;

}

- const bool at_word_boundary =

- (next_word_starts != end_word_starts) &&

- (*next_word_starts == url_match.offset + term_offset);

+ const bool at_word_boundary = (next_word_starts != end_word_starts) &&

+ (*next_word_starts == term_word_offset);

if ((question_mark_pos != std::string::npos) &&

- (url_match.offset > question_mark_pos)) {

+ (term_word_offset >= question_mark_pos)) {

// The match is in a CGI ?... fragment.

DCHECK(at_word_boundary);

term_scores[url_match.term_num] += 5;

} else if ((end_of_hostname_pos != std::string::npos) &&

- (url_match.offset > end_of_hostname_pos)) {

+ (term_word_offset >= end_of_hostname_pos)) {

// The match is in the path.

DCHECK(at_word_boundary);

term_scores[url_match.term_num] += 8;

} else if ((colon_pos == std::string::npos) ||

- (url_match.offset > colon_pos)) {

+ (term_word_offset >= colon_pos)) {

// The match is in the hostname.

if ((last_part_of_hostname_pos == std::string::npos) ||

- (url_match.offset < last_part_of_hostname_pos)) {

+ (term_word_offset < last_part_of_hostname_pos)) {

// Either there are no dots in the hostname or this match isn't

// the last dotted component.

term_scores[url_match.term_num] += at_word_boundary ? 10 : 2;

@@ -524,20 +527,22 @@ float ScoredHistoryMatch::GetTopicalityScore(

title_matches, terms_to_word_starts_offsets,

word_starts.title_word_starts_, 0, std::string::npos);

for (const auto& title_match : title_matches) {

- const size_t term_offset =

- terms_to_word_starts_offsets[title_match.term_num];

+ // Calculate the offset in the title string where the meaningful (word) part

+ // of the term starts. This takes into account times when a term starts

+ // with punctuation such as "/foo".

+ const size_t term_word_offset =

+ title_match.offset + terms_to_word_starts_offsets[title_match.term_num];

// Advance next_word_starts until it's >= the position of the term we're

// considering (adjusted for where the word begins within the term).

while ((next_word_starts != end_word_starts) &&

- (*next_word_starts < (title_match.offset + term_offset))) {

+ (*next_word_starts < term_word_offset)) {

++next_word_starts;

++word_num;

}

if (word_num >= num_title_words_to_allow_)

break; // only count the first ten words

DCHECK(next_word_starts != end_word_starts);

- DCHECK_EQ(*next_word_starts, title_match.offset + term_offset)

- << "not at word boundary";

+ DCHECK_EQ(*next_word_starts, term_word_offset) << "not at word boundary";

term_scores[title_match.term_num] += 8;

}

// TODO(mpearson): Restore logic for penalizing out-of-order matches.

« no previous file with comments | « no previous file | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »