Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Unified Diff: chrome/browser/history/in_memory_url_index_types.cc

Issue 9655003: Gather word-start Information to Aid in Scoring. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/history/in_memory_url_index_types.cc
===================================================================
--- chrome/browser/history/in_memory_url_index_types.cc (revision 125621)
+++ chrome/browser/history/in_memory_url_index_types.cc (working copy)
@@ -96,18 +96,22 @@
// Utility Functions -----------------------------------------------------------
-String16Set String16SetFromString16(const string16& uni_string) {
- const size_t kMaxWordLength = 64;
- String16Vector words = String16VectorFromString16(uni_string, false);
+String16Set String16SetFromString16(const string16& uni_string,
+ std::vector<int>* word_starts) {
+ String16Vector words =
+ String16VectorFromString16(uni_string, false, word_starts);
String16Set word_set;
for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
++iter)
- word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
+ word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars));
return word_set;
}
String16Vector String16VectorFromString16(const string16& uni_string,
- bool break_on_space) {
+ bool break_on_space,
+ std::vector<int>* word_starts) {
+ if (word_starts)
+ word_starts->clear();
base::i18n::BreakIterator iter(uni_string,
break_on_space ? base::i18n::BreakIterator::BREAK_SPACE :
base::i18n::BreakIterator::BREAK_WORD);
@@ -116,11 +120,23 @@
return words;
while (iter.Advance()) {
if (break_on_space || iter.IsWord()) {
- string16 word = iter.GetString();
+ string16 word(iter.GetString());
+ TrimPositions trimmed = TRIM_NONE;
if (break_on_space)
- TrimWhitespace(word, TRIM_ALL, &word);
- if (!word.empty())
- words.push_back(word);
+ trimmed = TrimWhitespace(word, TRIM_ALL, &word);
+ if (word.empty())
+ continue;
+ words.push_back(word);
+ if (!word_starts)
+ continue;
+ size_t word_start = iter.prev();
+ if (word_start >= kMaxSignificantChars)
+ continue;
+ if (trimmed & TRIM_LEADING) {
+ string16 original_word(iter.GetString());
Peter Kasting 2012/03/09 02:37:43 Nit: Shorter: if (trimmed & TRIM_LEADING) w
mrossetti 2012/03/14 23:23:49 Done! That's a great approach. On 2012/03/09 02:3
+ word_start += original_word.find(word);
+ }
+ word_starts->push_back(word_start);
}
}
return words;
@@ -147,4 +163,9 @@
return prefixes.count(prefix) != 0;
}
+// WordStarts ------------------------------------------------------------------
+
+WordStarts::WordStarts() {}
+WordStarts::~WordStarts() {}
+
} // namespace history

Powered by Google App Engine
This is Rietveld 408576698