Index: chrome/browser/history/url_index_private_data.cc |
=================================================================== |
--- chrome/browser/history/url_index_private_data.cc (revision 125621) |
+++ chrome/browser/history/url_index_private_data.cc (working copy) |
@@ -42,6 +42,9 @@ |
typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
HistoryInfoMapEntry; |
+typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; |
+typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry |
+ WordStartsMapEntry; |
// The maximum score any candidate result can achieve. |
const int kMaxTotalScore = 1425; |
@@ -115,7 +118,9 @@ |
// InMemoryURLIndex's Private Data --------------------------------------------- |
URLIndexPrivateData::URLIndexPrivateData() |
- : pre_filter_item_count_(0), |
+ : restored_cache_version_(0), |
+ saved_cache_version_(kCurrentCacheFileVersion), |
+ pre_filter_item_count_(0), |
post_filter_item_count_(0), |
post_scoring_item_count_(0) { |
URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); |
@@ -131,6 +136,7 @@ |
word_id_history_map_.clear(); |
history_id_word_map_.clear(); |
history_info_map_.clear(); |
+ word_starts_map_.clear(); |
} |
// Cache Updating -------------------------------------------------------------- |
@@ -161,11 +167,14 @@ |
history_info_map_[history_id] = new_row; |
// Index the words contained in the URL and title of the row. |
- AddRowWordsToIndex(new_row); |
+ RowWordStarts word_starts; |
+ AddRowWordsToIndex(new_row, &word_starts); |
+ word_starts_map_[history_id] = word_starts; |
return true; |
} |
-void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { |
+void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
+ RowWordStarts* word_starts) { |
HistoryID history_id = static_cast<HistoryID>(row.id()); |
// Split URL into individual, unique words then add in the title words. |
const GURL& gurl(row.url()); |
@@ -174,8 +183,10 @@ |
net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
NULL, NULL, NULL)); |
url = base::i18n::ToLower(url); |
- String16Set url_words = String16SetFromString16(url); |
- String16Set title_words = String16SetFromString16(row.title()); |
+ String16Set url_words = String16SetFromString16(url, |
+ word_starts ? &word_starts->url_word_starts_ : NULL); |
+ String16Set title_words = String16SetFromString16(row.title(), |
+ word_starts ? &word_starts->title_word_starts_ : NULL); |
String16Set words; |
std::set_union(url_words.begin(), url_words.end(), |
title_words.begin(), title_words.end(), |
@@ -246,6 +257,7 @@ |
RemoveRowWordsFromIndex(row); |
HistoryID history_id = static_cast<HistoryID>(row.id()); |
history_info_map_.erase(history_id); |
+ word_starts_map_.erase(history_id); |
} |
void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { |
@@ -328,7 +340,9 @@ |
// URL and title. |
RemoveRowWordsFromIndex(row_to_update); |
row_to_update.set_title(row.title()); |
- AddRowWordsToIndex(row_to_update); |
+ RowWordStarts word_starts; |
+ AddRowWordsToIndex(row_to_update, &word_starts); |
+ word_starts_map_[row_id] = word_starts; |
} |
row_was_updated = true; |
} |
@@ -424,7 +438,7 @@ |
// search string. When the user types "colspec=ID%20Mstone Release" we get |
// four 'words': "colspec", "id", "mstone" and "release". |
String16Vector lower_words( |
- history::String16VectorFromString16(lower_unescaped_string, false)); |
+ history::String16VectorFromString16(lower_unescaped_string, false, NULL)); |
ScoredHistoryMatches scored_items; |
// Do nothing if we have indexed no words (probably because we've not been |
@@ -536,8 +550,12 @@ |
// deleted by the user or the item no longer qualifies as a quick result. |
if (hist_pos != private_data_.history_info_map_.end()) { |
const URLRow& hist_item = hist_pos->second; |
- ScoredHistoryMatch match( |
- ScoredMatchForURL(hist_item, lower_string_, lower_terms_)); |
+ WordStartsMap::const_iterator starts_pos = |
+ private_data_.word_starts_map_.find(history_id); |
+ DCHECK(starts_pos != private_data_.word_starts_map_.end()); |
+ ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_, |
+ lower_terms_, |
+ starts_pos->second)); |
if (match.raw_score > 0) |
scored_matches_.push_back(match); |
} |
@@ -548,7 +566,8 @@ |
ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( |
const URLRow& row, |
const string16& lower_string, |
- const String16Vector& terms) { |
+ const String16Vector& terms, |
+ const RowWordStarts& word_starts) { |
ScoredHistoryMatch match(row); |
GURL gurl = row.url(); |
if (!gurl.is_valid()) |
@@ -664,14 +683,13 @@ |
// Score component for how early in the match string the first search term |
// appears. Start with kStartMaxValue points and discount by |
- // kStartMaxValue/kMaxSignificantStart points for each character later than |
+ // kStartMaxValue/kMaxSignificantChars points for each character later than |
// the first at which the term begins. No points are earned if the start of |
- // the match occurs at or after kMaxSignificantStart. |
- const size_t kMaxSignificantStart = 50; |
+ // the match occurs at or after kMaxSignificantChars. |
const int kStartMaxValue = 1000; |
- int start_value = (kMaxSignificantStart - |
- std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / |
- kMaxSignificantStart; |
+ int start_value = (kMaxSignificantChars - |
+ std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / |
+ kMaxSignificantChars; |
// Score component for how much of the matched string the input terms cover. |
// kCompleteMaxValue points times the fraction of the URL/page title string |
@@ -928,6 +946,7 @@ |
InMemoryURLIndexCacheItem* cache) const { |
DCHECK(cache); |
cache->set_timestamp(base::Time::Now().ToInternalValue()); |
+ cache->set_version(saved_cache_version_); |
// history_item_count_ is no longer used but rather than change the protobuf |
// definition use a placeholder. This will go away with the switch to SQLite. |
cache->set_history_item_count(0); |
@@ -936,6 +955,7 @@ |
SaveCharWordMap(cache); |
SaveWordIDHistoryMap(cache); |
SaveHistoryInfoMap(cache); |
+ SaveWordStartsMap(cache); |
} |
void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
@@ -1020,6 +1040,34 @@ |
} |
} |
+void URLIndexPrivateData::SaveWordStartsMap( |
+ InMemoryURLIndexCacheItem* cache) const { |
+ if (word_starts_map_.empty()) |
+ return; |
+ // For unit testing: Enable saving of the cache as an earlier version to |
+ // allow testing of cache file upgrading in ReadFromFile(). |
+ // TODO(mrossetti): Instead of intruding on production code with this kind of |
+ // test harness, save a copy of an older version cache with known results. |
+ // Implement this when switching the caching over to SQLite. |
+ if (saved_cache_version_ < 1) |
+ return; |
+ |
+ WordStartsMapItem* map_item = cache->mutable_word_starts_map(); |
+ map_item->set_item_count(word_starts_map_.size()); |
+ for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); |
+ iter != word_starts_map_.end(); ++iter) { |
+ WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); |
+ map_entry->set_history_id(iter->first); |
+ const RowWordStarts& word_starts(iter->second); |
+ for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); |
+ i != word_starts.url_word_starts_.end(); ++i) |
+ map_entry->add_url_word_starts(*i); |
+ for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); |
+ i != word_starts.title_word_starts_.end(); ++i) |
+ map_entry->add_title_word_starts(*i); |
+ } |
+} |
+ |
// Cache Restoring ------------------------------------------------------------- |
bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
@@ -1090,9 +1138,11 @@ |
bool URLIndexPrivateData::RestorePrivateData( |
const InMemoryURLIndexCacheItem& cache) { |
+ if (cache.has_version()) |
+ restored_cache_version_ = cache.version(); |
return RestoreWordList(cache) && RestoreWordMap(cache) && |
RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
- RestoreHistoryInfoMap(cache); |
+ RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); |
} |
bool URLIndexPrivateData::RestoreWordList( |
@@ -1213,4 +1263,54 @@ |
return true; |
} |
+bool URLIndexPrivateData::RestoreWordStartsMap( |
+ const InMemoryURLIndexCacheItem& cache) { |
+ // Note that this function must be called after RestoreHistoryInfoMap() has |
+ // been run as the word starts may have to be recalculated from the urls and |
+ // page titles. |
+ if (cache.has_word_starts_map()) { |
+ const WordStartsMapItem& list_item(cache.word_starts_map()); |
+ uint32 expected_item_count = list_item.item_count(); |
+ uint32 actual_item_count = list_item.word_starts_map_entry_size(); |
+ if (actual_item_count == 0 || actual_item_count != expected_item_count) |
+ return false; |
+ const RepeatedPtrField<WordStartsMapEntry>& |
+ entries(list_item.word_starts_map_entry()); |
+ for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = |
+ entries.begin(); iter != entries.end(); ++iter) { |
+ HistoryID history_id = iter->history_id(); |
+ RowWordStarts word_starts; |
+ // Restore the URL word starts. |
+ const RepeatedField<int32>& url_starts(iter->url_word_starts()); |
+ for (RepeatedField<int32>::const_iterator jiter = url_starts.begin(); |
+ jiter != url_starts.end(); ++jiter) |
+ word_starts.url_word_starts_.push_back(*jiter); |
+ // Restore the page title word starts. |
+ const RepeatedField<int32>& title_starts(iter->title_word_starts()); |
+ for (RepeatedField<int32>::const_iterator jiter = title_starts.begin(); |
+ jiter != title_starts.end(); ++jiter) |
+ word_starts.title_word_starts_.push_back(*jiter); |
+ word_starts_map_[history_id] = word_starts; |
+ } |
+ } else { |
+ // Since the cache did not contain any word starts we must rebuild then from |
+ // the URL and page titles. |
+ for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
+ iter != history_info_map_.end(); ++iter) { |
+ RowWordStarts word_starts; |
+ const URLRow& row(iter->second); |
+ string16 url(net::FormatUrl(row.url(), languages_, |
+ net::kFormatUrlOmitUsernamePassword, |
+ net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
+ NULL, NULL, NULL)); |
+ url = base::i18n::ToLower(url); |
+ String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
+ String16VectorFromString16( |
+ row.title(), false, &word_starts.title_word_starts_); |
+ word_starts_map_[iter->first] = word_starts; |
+ } |
+ } |
+ return true; |
+} |
+ |
} // namespace history |