Index: chrome/browser/history/in_memory_url_index_types.cc |
=================================================================== |
--- chrome/browser/history/in_memory_url_index_types.cc (revision 125621) |
+++ chrome/browser/history/in_memory_url_index_types.cc (working copy) |
@@ -96,18 +96,22 @@ |
// Utility Functions ----------------------------------------------------------- |
-String16Set String16SetFromString16(const string16& uni_string) { |
- const size_t kMaxWordLength = 64; |
- String16Vector words = String16VectorFromString16(uni_string, false); |
+String16Set String16SetFromString16(const string16& uni_string, |
+ std::vector<int>* word_starts) { |
+ String16Vector words = |
+ String16VectorFromString16(uni_string, false, word_starts); |
String16Set word_set; |
for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); |
++iter) |
- word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); |
+ word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars)); |
return word_set; |
} |
String16Vector String16VectorFromString16(const string16& uni_string, |
- bool break_on_space) { |
+ bool break_on_space, |
+ std::vector<int>* word_starts) { |
+ if (word_starts) |
+ word_starts->clear(); |
base::i18n::BreakIterator iter(uni_string, |
break_on_space ? base::i18n::BreakIterator::BREAK_SPACE : |
base::i18n::BreakIterator::BREAK_WORD); |
@@ -116,11 +120,23 @@ |
return words; |
while (iter.Advance()) { |
if (break_on_space || iter.IsWord()) { |
- string16 word = iter.GetString(); |
+ string16 word(iter.GetString()); |
+ TrimPositions trimmed = TRIM_NONE; |
if (break_on_space) |
- TrimWhitespace(word, TRIM_ALL, &word); |
- if (!word.empty()) |
- words.push_back(word); |
+ trimmed = TrimWhitespace(word, TRIM_ALL, &word); |
+ if (word.empty()) |
+ continue; |
+ words.push_back(word); |
+ if (!word_starts) |
+ continue; |
+ size_t word_start = iter.prev(); |
+ if (word_start >= kMaxSignificantChars) |
+ continue; |
+ if (trimmed & TRIM_LEADING) { |
+ string16 original_word(iter.GetString()); |
Peter Kasting
2012/03/09 02:37:43
Nit: Shorter:
if (trimmed & TRIM_LEADING)
w
mrossetti
2012/03/14 23:23:49
Done! That's a great approach.
On 2012/03/09 02:3
|
+ word_start += original_word.find(word); |
+ } |
+ word_starts->push_back(word_start); |
} |
} |
return words; |
@@ -147,4 +163,9 @@ |
return prefixes.count(prefix) != 0; |
} |
+// WordStarts ------------------------------------------------------------------ |
+ |
+WordStarts::WordStarts() {} |
+WordStarts::~WordStarts() {} |
+ |
} // namespace history |