Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: components/omnibox/browser/scored_history_match_unittest.cc

Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation (Closed)
Patch Set: peter's comments, plus git cl format the rest of the changelist Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/omnibox/browser/scored_history_match.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <memory> 8 #include <memory>
9 9
10 #include "base/auto_reset.h" 10 #include "base/auto_reset.h"
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/i18n/break_iterator.h"
12 #include "base/memory/ptr_util.h" 13 #include "base/memory/ptr_util.h"
13 #include "base/strings/string16.h" 14 #include "base/strings/string16.h"
14 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
15 #include "components/search_engines/search_terms_data.h" 16 #include "components/search_engines/search_terms_data.h"
16 #include "components/search_engines/template_url.h" 17 #include "components/search_engines/template_url.h"
17 #include "components/search_engines/template_url_service.h" 18 #include "components/search_engines/template_url_service.h"
18 #include "components/search_engines/template_url_service_client.h" 19 #include "components/search_engines/template_url_service_client.h"
19 #include "testing/gmock/include/gmock/gmock.h" 20 #include "testing/gmock/include/gmock/gmock.h"
20 #include "testing/gtest/include/gtest/gtest.h" 21 #include "testing/gtest/include/gtest/gtest.h"
21 22
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
112 const base::string16& title) { 113 const base::string16& title) {
113 // Make an empty match and simply populate the fields we need in order 114 // Make an empty match and simply populate the fields we need in order
114 // to call GetTopicalityScore(). 115 // to call GetTopicalityScore().
115 ScoredHistoryMatch scored_match; 116 ScoredHistoryMatch scored_match;
116 scored_match.url_matches = MatchTermInString(term, url, 0); 117 scored_match.url_matches = MatchTermInString(term, url, 0);
117 scored_match.title_matches = MatchTermInString(term, title, 0); 118 scored_match.title_matches = MatchTermInString(term, title, 0);
118 scored_match.topicality_threshold_ = -1; 119 scored_match.topicality_threshold_ = -1;
119 RowWordStarts word_starts; 120 RowWordStarts word_starts;
120 String16SetFromString16(url, &word_starts.url_word_starts_); 121 String16SetFromString16(url, &word_starts.url_word_starts_);
121 String16SetFromString16(title, &word_starts.title_word_starts_); 122 String16SetFromString16(title, &word_starts.title_word_starts_);
122 WordStarts one_word_no_offset(1, 0u); 123 WordStarts term_word_starts(1, 0u);
123 return scored_match.GetTopicalityScore(1, url, one_word_no_offset, 124 base::i18n::BreakIterator iter(term, base::i18n::BreakIterator::BREAK_WORD);
124 word_starts); 125 if (iter.Init()) {
126 // Find the first word start.
127 while (iter.Advance() && !iter.IsWord()) {
128 }
129 term_word_starts[0] = iter.prev();
130 }
131 return scored_match.GetTopicalityScore(1, url, term_word_starts, word_starts);
125 } 132 }
126 133
127 TEST_F(ScoredHistoryMatchTest, Scoring) { 134 TEST_F(ScoredHistoryMatchTest, Scoring) {
128 // We use NowFromSystemTime() because MakeURLRow uses the same function 135 // We use NowFromSystemTime() because MakeURLRow uses the same function
129 // to calculate last visit time when building a row. 136 // to calculate last visit time when building a row.
130 base::Time now = base::Time::NowFromSystemTime(); 137 base::Time now = base::Time::NowFromSystemTime();
131 138
132 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1)); 139 history::URLRow row_a(MakeURLRow("http://fedcba", "abcd bcd", 3, 30, 1));
133 RowWordStarts word_starts_a; 140 RowWordStarts word_starts_a;
134 PopulateWordStarts(row_a, &word_starts_a); 141 PopulateWordStarts(row_a, &word_starts_a);
(...skipping 407 matching lines...) Expand 10 before | Expand all | Expand 10 after
542 EXPECT_EQ(27u, filtered_term_matches[0].offset); 549 EXPECT_EQ(27u, filtered_term_matches[0].offset);
543 } 550 }
544 551
545 // This function only tests scoring of single terms that match exactly 552 // This function only tests scoring of single terms that match exactly
546 // once somewhere in the URL or title. 553 // once somewhere in the URL or title.
547 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { 554 TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) {
548 base::string16 url = ASCIIToUTF16( 555 base::string16 url = ASCIIToUTF16(
549 "http://abc.def.com/path1/path2?" 556 "http://abc.def.com/path1/path2?"
550 "arg1=val1&arg2=val2#hash_component"); 557 "arg1=val1&arg2=val2#hash_component");
551 base::string16 title = ASCIIToUTF16("here is a title"); 558 base::string16 title = ASCIIToUTF16("here is a title");
552 const float hostname_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 559 auto Score = [&](const char* term) {
553 ASCIIToUTF16("abc"), url, title); 560 return GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16(term), url,
554 const float hostname_mid_word_score = 561 title);
555 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("bc"), url, 562 };
556 title); 563 const float hostname_score = Score("abc");
557 const float domain_name_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 564 const float hostname_mid_word_score = Score("bc");
558 ASCIIToUTF16("def"), url, title); 565 const float hostname_score_preceeding_punctuation = Score("://abc");
559 const float domain_name_mid_word_score = 566 const float domain_name_score = Score("def");
560 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("ef"), url, 567 const float domain_name_mid_word_score = Score("ef");
561 title); 568 const float domain_name_score_preceeding_dot = Score(".def");
562 const float tld_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 569 const float tld_score = Score("com");
563 ASCIIToUTF16("com"), url, title); 570 const float tld_mid_word_score = Score("om");
564 const float tld_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 571 const float tld_score_preceeding_dot = Score(".com");
565 ASCIIToUTF16("om"), url, title); 572 const float path_score = Score("path1");
566 const float path_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 573 const float path_mid_word_score = Score("ath1");
567 ASCIIToUTF16("path1"), url, title); 574 const float path_score_preceeding_slash = Score("/path1");
568 const float path_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 575 const float arg_score = Score("arg1");
569 ASCIIToUTF16("ath1"), url, title); 576 const float arg_mid_word_score = Score("rg1");
570 const float arg_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 577 const float arg_score_preceeding_question_mark = Score("?arg1");
571 ASCIIToUTF16("arg2"), url, title); 578 const float protocol_score = Score("htt");
572 const float arg_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 579 const float protocol_mid_word_score = Score("tt");
573 ASCIIToUTF16("rg2"), url, title); 580 const float title_score = Score("her");
574 const float protocol_score = GetTopicalityScoreOfTermAgainstURLAndTitle( 581 const float title_mid_word_score = Score("er");
575 ASCIIToUTF16("htt"), url, title);
576 const float protocol_mid_word_score =
577 GetTopicalityScoreOfTermAgainstURLAndTitle(ASCIIToUTF16("tt"), url,
578 title);
579 const float title_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
580 ASCIIToUTF16("her"), url, title);
581 const float title_mid_word_score = GetTopicalityScoreOfTermAgainstURLAndTitle(
582 ASCIIToUTF16("er"), url, title);
583 // Verify hostname and domain name > path > arg. 582 // Verify hostname and domain name > path > arg.
584 EXPECT_GT(hostname_score, path_score); 583 EXPECT_GT(hostname_score, path_score);
585 EXPECT_GT(domain_name_score, path_score); 584 EXPECT_GT(domain_name_score, path_score);
586 EXPECT_GT(path_score, arg_score); 585 EXPECT_GT(path_score, arg_score);
586 // Verify leading punctuation doesn't confuse scoring.
587 EXPECT_EQ(hostname_score, hostname_score_preceeding_punctuation);
588 EXPECT_EQ(domain_name_score, domain_name_score_preceeding_dot);
589 EXPECT_EQ(tld_score, tld_score_preceeding_dot);
590 EXPECT_EQ(path_score, path_score_preceeding_slash);
591 EXPECT_EQ(arg_score, arg_score_preceeding_question_mark);
587 // Verify that domain name > path and domain name > arg for non-word 592 // Verify that domain name > path and domain name > arg for non-word
588 // boundaries. 593 // boundaries.
589 EXPECT_GT(hostname_mid_word_score, path_mid_word_score); 594 EXPECT_GT(hostname_mid_word_score, path_mid_word_score);
590 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score); 595 EXPECT_GT(domain_name_mid_word_score, path_mid_word_score);
591 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score); 596 EXPECT_GT(domain_name_mid_word_score, arg_mid_word_score);
592 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score); 597 EXPECT_GT(hostname_mid_word_score, arg_mid_word_score);
593 // Also verify that the matches at non-word-boundaries all score 598 // Also verify that the matches at non-word-boundaries all score
594 // worse than the matches at word boundaries. These three sets suffice. 599 // worse than the matches at word boundaries. These three sets suffice.
595 EXPECT_GT(arg_score, hostname_mid_word_score); 600 EXPECT_GT(arg_score, hostname_mid_word_score);
596 EXPECT_GT(arg_score, domain_name_mid_word_score); 601 EXPECT_GT(arg_score, domain_name_mid_word_score);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
654 659
655 EXPECT_TRUE( 660 EXPECT_TRUE(
656 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); 661 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));
657 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600), 662 EXPECT_THAT(hqp_buckets, ElementsAre(Pair(0.0, 400), Pair(1.5, 600),
658 Pair(12.0, 1300), Pair(20.0, 1399))); 663 Pair(12.0, 1300), Pair(20.0, 1399)));
659 // invalid string. 664 // invalid string.
660 buckets_str = "0.0,400,1.5,600"; 665 buckets_str = "0.0,400,1.5,600";
661 EXPECT_FALSE( 666 EXPECT_FALSE(
662 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets)); 667 ScoredHistoryMatch::GetHQPBucketsFromString(buckets_str, &hqp_buckets));
663 } 668 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/scored_history_match.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698