Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Unified Diff: components/omnibox/browser/scored_history_match.cc

Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation (Closed)
Patch Set: peter's comments, plus git cl format the rest of the changelist Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/omnibox/browser/scored_history_match.cc
diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc
index 6d1b0eac25d2fbf3488bab6e4e0e8ec1e4f5bf1b..a42e984e62422feb6e2518aec2f2a86165299632 100644
--- a/components/omnibox/browser/scored_history_match.cc
+++ b/components/omnibox/browser/scored_history_match.cc
@@ -473,31 +473,34 @@ float ScoredHistoryMatch::GetTopicalityScore(
0, colon_pos);
}
for (const auto& url_match : url_matches) {
- const size_t term_offset = terms_to_word_starts_offsets[url_match.term_num];
+ // Calculate the offset in the URL string where the meaningful (word) part
+ // of the term starts. This takes into account times when a term starts
+ // with punctuation such as "/foo".
+ const size_t term_word_offset =
+ url_match.offset + terms_to_word_starts_offsets[url_match.term_num];
// Advance next_word_starts until it's >= the position of the term we're
// considering (adjusted for where the word begins within the term).
while ((next_word_starts != end_word_starts) &&
- (*next_word_starts < (url_match.offset + term_offset))) {
+ (*next_word_starts < term_word_offset)) {
++next_word_starts;
}
- const bool at_word_boundary =
- (next_word_starts != end_word_starts) &&
- (*next_word_starts == url_match.offset + term_offset);
+ const bool at_word_boundary = (next_word_starts != end_word_starts) &&
+ (*next_word_starts == term_word_offset);
if ((question_mark_pos != std::string::npos) &&
- (url_match.offset > question_mark_pos)) {
+ (term_word_offset >= question_mark_pos)) {
// The match is in a CGI ?... fragment.
DCHECK(at_word_boundary);
term_scores[url_match.term_num] += 5;
} else if ((end_of_hostname_pos != std::string::npos) &&
- (url_match.offset > end_of_hostname_pos)) {
+ (term_word_offset >= end_of_hostname_pos)) {
// The match is in the path.
DCHECK(at_word_boundary);
term_scores[url_match.term_num] += 8;
} else if ((colon_pos == std::string::npos) ||
- (url_match.offset > colon_pos)) {
+ (term_word_offset >= colon_pos)) {
// The match is in the hostname.
if ((last_part_of_hostname_pos == std::string::npos) ||
- (url_match.offset < last_part_of_hostname_pos)) {
+ (term_word_offset < last_part_of_hostname_pos)) {
// Either there are no dots in the hostname or this match isn't
// the last dotted component.
term_scores[url_match.term_num] += at_word_boundary ? 10 : 2;
@@ -524,20 +527,22 @@ float ScoredHistoryMatch::GetTopicalityScore(
title_matches, terms_to_word_starts_offsets,
word_starts.title_word_starts_, 0, std::string::npos);
for (const auto& title_match : title_matches) {
- const size_t term_offset =
- terms_to_word_starts_offsets[title_match.term_num];
+ // Calculate the offset in the title string where the meaningful (word) part
+ // of the term starts. This takes into account times when a term starts
+ // with punctuation such as "/foo".
+ const size_t term_word_offset =
+ title_match.offset + terms_to_word_starts_offsets[title_match.term_num];
// Advance next_word_starts until it's >= the position of the term we're
// considering (adjusted for where the word begins within the term).
while ((next_word_starts != end_word_starts) &&
- (*next_word_starts < (title_match.offset + term_offset))) {
+ (*next_word_starts < term_word_offset)) {
++next_word_starts;
++word_num;
}
if (word_num >= num_title_words_to_allow_)
break; // only count the first ten words
DCHECK(next_word_starts != end_word_starts);
- DCHECK_EQ(*next_word_starts, title_match.offset + term_offset)
- << "not at word boundary";
+ DCHECK_EQ(*next_word_starts, term_word_offset) << "not at word boundary";
term_scores[title_match.term_num] += 8;
}
// TODO(mpearson): Restore logic for penalizing out-of-order matches.
« no previous file with comments | « no previous file | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698