OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
(...skipping 24 matching lines...) Expand all Loading... | |
35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry | 35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry |
36 CharWordMapEntry; | 36 CharWordMapEntry; |
37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem | 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem |
38 WordIDHistoryMapItem; | 38 WordIDHistoryMapItem; |
39 typedef imui:: | 39 typedef imui:: |
40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry | 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry |
41 WordIDHistoryMapEntry; | 41 WordIDHistoryMapEntry; |
42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; | 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry | 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
44 HistoryInfoMapEntry; | 44 HistoryInfoMapEntry; |
45 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; | |
46 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry | |
47 WordStartsMapEntry; | |
45 | 48 |
46 // The maximum score any candidate result can achieve. | 49 // The maximum score any candidate result can achieve. |
47 const int kMaxTotalScore = 1425; | 50 const int kMaxTotalScore = 1425; |
48 | 51 |
49 // Score ranges used to get a 'base' score for each of the scoring factors | 52 // Score ranges used to get a 'base' score for each of the scoring factors |
50 // (such as recency of last visit, times visited, times the URL was typed, | 53 // (such as recency of last visit, times visited, times the URL was typed, |
51 // and the quality of the string match). There is a matching value range for | 54 // and the quality of the string match). There is a matching value range for |
52 // each of these scores for each factor. Note that the top score is greater | 55 // each of these scores for each factor. Note that the top score is greater |
53 // than |kMaxTotalScore|. The score for each candidate will be capped in the | 56 // than |kMaxTotalScore|. The score for each candidate will be capped in the |
54 // final calculation. | 57 // final calculation. |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
108 score += (value - value_ranks[i]) * | 111 score += (value - value_ranks[i]) * |
109 (kScoreRank[i - 1] - kScoreRank[i]) / | 112 (kScoreRank[i - 1] - kScoreRank[i]) / |
110 (value_ranks[i - 1] - value_ranks[i]); | 113 (value_ranks[i - 1] - value_ranks[i]); |
111 } | 114 } |
112 return score; | 115 return score; |
113 } | 116 } |
114 | 117 |
115 // InMemoryURLIndex's Private Data --------------------------------------------- | 118 // InMemoryURLIndex's Private Data --------------------------------------------- |
116 | 119 |
117 URLIndexPrivateData::URLIndexPrivateData() | 120 URLIndexPrivateData::URLIndexPrivateData() |
118 : pre_filter_item_count_(0), | 121 : restored_cache_version_(0), |
122 saved_cache_version_(kCurrentCacheFileVersion), | |
123 pre_filter_item_count_(0), | |
119 post_filter_item_count_(0), | 124 post_filter_item_count_(0), |
120 post_scoring_item_count_(0) { | 125 post_scoring_item_count_(0) { |
121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); | 126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); |
122 } | 127 } |
123 | 128 |
124 URLIndexPrivateData::~URLIndexPrivateData() {} | 129 URLIndexPrivateData::~URLIndexPrivateData() {} |
125 | 130 |
126 void URLIndexPrivateData::Clear() { | 131 void URLIndexPrivateData::Clear() { |
127 word_list_.clear(); | 132 word_list_.clear(); |
128 available_words_.clear(); | 133 available_words_.clear(); |
129 word_map_.clear(); | 134 word_map_.clear(); |
130 char_word_map_.clear(); | 135 char_word_map_.clear(); |
131 word_id_history_map_.clear(); | 136 word_id_history_map_.clear(); |
132 history_id_word_map_.clear(); | 137 history_id_word_map_.clear(); |
133 history_info_map_.clear(); | 138 history_info_map_.clear(); |
139 word_starts_map_.clear(); | |
134 } | 140 } |
135 | 141 |
136 // Cache Updating -------------------------------------------------------------- | 142 // Cache Updating -------------------------------------------------------------- |
137 | 143 |
138 bool URLIndexPrivateData::IndexRow(const URLRow& row) { | 144 bool URLIndexPrivateData::IndexRow(const URLRow& row) { |
139 const GURL& gurl(row.url()); | 145 const GURL& gurl(row.url()); |
140 | 146 |
141 // Index only URLs with a whitelisted scheme. | 147 // Index only URLs with a whitelisted scheme. |
142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) |
143 return false; | 149 return false; |
(...skipping 10 matching lines...) Expand all Loading... | |
154 | 160 |
155 // Add the row for quick lookup in the history info store. | 161 // Add the row for quick lookup in the history info store. |
156 URLRow new_row(GURL(url), row_id); | 162 URLRow new_row(GURL(url), row_id); |
157 new_row.set_visit_count(row.visit_count()); | 163 new_row.set_visit_count(row.visit_count()); |
158 new_row.set_typed_count(row.typed_count()); | 164 new_row.set_typed_count(row.typed_count()); |
159 new_row.set_last_visit(row.last_visit()); | 165 new_row.set_last_visit(row.last_visit()); |
160 new_row.set_title(row.title()); | 166 new_row.set_title(row.title()); |
161 history_info_map_[history_id] = new_row; | 167 history_info_map_[history_id] = new_row; |
162 | 168 |
163 // Index the words contained in the URL and title of the row. | 169 // Index the words contained in the URL and title of the row. |
164 AddRowWordsToIndex(new_row); | 170 WordStarts word_starts; |
171 AddRowWordsToIndex(new_row, &word_starts); | |
172 word_starts_map_[history_id] = word_starts; | |
165 return true; | 173 return true; |
166 } | 174 } |
167 | 175 |
168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { | 176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
177 WordStarts* word_starts) { | |
169 HistoryID history_id = static_cast<HistoryID>(row.id()); | 178 HistoryID history_id = static_cast<HistoryID>(row.id()); |
170 // Split URL into individual, unique words then add in the title words. | 179 // Split URL into individual, unique words then add in the title words. |
171 const GURL& gurl(row.url()); | 180 const GURL& gurl(row.url()); |
172 string16 url(net::FormatUrl(gurl, languages_, | 181 string16 url(net::FormatUrl(gurl, languages_, |
173 net::kFormatUrlOmitUsernamePassword, | 182 net::kFormatUrlOmitUsernamePassword, |
174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 183 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
175 NULL, NULL, NULL)); | 184 NULL, NULL, NULL)); |
176 url = base::i18n::ToLower(url); | 185 url = base::i18n::ToLower(url); |
177 String16Set url_words = String16SetFromString16(url); | 186 String16Set url_words = String16SetFromString16(url, |
178 String16Set title_words = String16SetFromString16(row.title()); | 187 word_starts ? &word_starts->url_word_starts_ : NULL); |
188 String16Set title_words = String16SetFromString16(row.title(), | |
189 word_starts ? &word_starts->title_word_starts_ : NULL); | |
179 String16Set words; | 190 String16Set words; |
180 std::set_union(url_words.begin(), url_words.end(), | 191 std::set_union(url_words.begin(), url_words.end(), |
181 title_words.begin(), title_words.end(), | 192 title_words.begin(), title_words.end(), |
182 std::insert_iterator<String16Set>(words, words.begin())); | 193 std::insert_iterator<String16Set>(words, words.begin())); |
183 for (String16Set::iterator word_iter = words.begin(); | 194 for (String16Set::iterator word_iter = words.begin(); |
184 word_iter != words.end(); ++word_iter) | 195 word_iter != words.end(); ++word_iter) |
185 AddWordToIndex(*word_iter, history_id); | 196 AddWordToIndex(*word_iter, history_id); |
186 | 197 |
187 search_term_cache_.clear(); // Invalidate the term cache. | 198 search_term_cache_.clear(); // Invalidate the term cache. |
188 } | 199 } |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
239 word_id_set.insert(word_id); | 250 word_id_set.insert(word_id); |
240 char_word_map_[uni_char] = word_id_set; | 251 char_word_map_[uni_char] = word_id_set; |
241 } | 252 } |
242 } | 253 } |
243 } | 254 } |
244 | 255 |
245 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { | 256 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { |
246 RemoveRowWordsFromIndex(row); | 257 RemoveRowWordsFromIndex(row); |
247 HistoryID history_id = static_cast<HistoryID>(row.id()); | 258 HistoryID history_id = static_cast<HistoryID>(row.id()); |
248 history_info_map_.erase(history_id); | 259 history_info_map_.erase(history_id); |
260 word_starts_map_.erase(history_id); | |
249 } | 261 } |
250 | 262 |
251 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { | 263 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { |
252 // Remove the entries in history_id_word_map_ and word_id_history_map_ for | 264 // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
253 // this row. | 265 // this row. |
254 HistoryID history_id = static_cast<HistoryID>(row.id()); | 266 HistoryID history_id = static_cast<HistoryID>(row.id()); |
255 WordIDSet word_id_set = history_id_word_map_[history_id]; | 267 WordIDSet word_id_set = history_id_word_map_[history_id]; |
256 history_id_word_map_.erase(history_id); | 268 history_id_word_map_.erase(history_id); |
257 | 269 |
258 // Reconcile any changes to word usage. | 270 // Reconcile any changes to word usage. |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
321 row_to_update.set_visit_count(row.visit_count()); | 333 row_to_update.set_visit_count(row.visit_count()); |
322 row_to_update.set_typed_count(row.typed_count()); | 334 row_to_update.set_typed_count(row.typed_count()); |
323 row_to_update.set_last_visit(row.last_visit()); | 335 row_to_update.set_last_visit(row.last_visit()); |
324 // While the URL is guaranteed to remain stable, the title may have | 336 // While the URL is guaranteed to remain stable, the title may have |
325 // changed. If so, then update the index with the changed words. | 337 // changed. If so, then update the index with the changed words. |
326 if (title_updated) { | 338 if (title_updated) { |
327 // Clear all words associated with this row and re-index both the | 339 // Clear all words associated with this row and re-index both the |
328 // URL and title. | 340 // URL and title. |
329 RemoveRowWordsFromIndex(row_to_update); | 341 RemoveRowWordsFromIndex(row_to_update); |
330 row_to_update.set_title(row.title()); | 342 row_to_update.set_title(row.title()); |
331 AddRowWordsToIndex(row_to_update); | 343 WordStarts word_starts; |
344 AddRowWordsToIndex(row_to_update, &word_starts); | |
345 word_starts_map_[row_id] = word_starts; | |
332 } | 346 } |
333 row_was_updated = true; | 347 row_was_updated = true; |
334 } | 348 } |
335 } else { | 349 } else { |
336 // This indexed row no longer qualifies and will be de-indexed by | 350 // This indexed row no longer qualifies and will be de-indexed by |
337 // clearing all words associated with this row. | 351 // clearing all words associated with this row. |
338 RemoveRowFromIndex(row); | 352 RemoveRowFromIndex(row); |
339 row_was_updated = true; | 353 row_was_updated = true; |
340 } | 354 } |
341 if (row_was_updated) | 355 if (row_was_updated) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
417 // the final filtering we need whitespace separated substrings possibly | 431 // the final filtering we need whitespace separated substrings possibly |
418 // containing escaped characters. | 432 // containing escaped characters. |
419 string16 lower_raw_string(base::i18n::ToLower(search_string)); | 433 string16 lower_raw_string(base::i18n::ToLower(search_string)); |
420 string16 lower_unescaped_string = | 434 string16 lower_unescaped_string = |
421 net::UnescapeURLComponent(lower_raw_string, | 435 net::UnescapeURLComponent(lower_raw_string, |
422 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 436 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
423 // Extract individual 'words' (as opposed to 'terms'; see below) from the | 437 // Extract individual 'words' (as opposed to 'terms'; see below) from the |
424 // search string. When the user types "colspec=ID%20Mstone Release" we get | 438 // search string. When the user types "colspec=ID%20Mstone Release" we get |
425 // four 'words': "colspec", "id", "mstone" and "release". | 439 // four 'words': "colspec", "id", "mstone" and "release". |
426 String16Vector lower_words( | 440 String16Vector lower_words( |
427 history::String16VectorFromString16(lower_unescaped_string, false)); | 441 history::String16VectorFromString16(lower_unescaped_string, false, NULL)); |
428 ScoredHistoryMatches scored_items; | 442 ScoredHistoryMatches scored_items; |
429 | 443 |
430 // Do nothing if we have indexed no words (probably because we've not been | 444 // Do nothing if we have indexed no words (probably because we've not been |
431 // initialized yet) or the search string has no words. | 445 // initialized yet) or the search string has no words. |
432 if (word_list_.empty() || lower_words.empty()) { | 446 if (word_list_.empty() || lower_words.empty()) { |
433 search_term_cache_.clear(); // Invalidate the term cache. | 447 search_term_cache_.clear(); // Invalidate the term cache. |
434 return scored_items; | 448 return scored_items; |
435 } | 449 } |
436 | 450 |
437 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 451 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
529 | 543 |
530 void URLIndexPrivateData::AddHistoryMatch::operator()( | 544 void URLIndexPrivateData::AddHistoryMatch::operator()( |
531 const HistoryID history_id) { | 545 const HistoryID history_id) { |
532 HistoryInfoMap::const_iterator hist_pos = | 546 HistoryInfoMap::const_iterator hist_pos = |
533 private_data_.history_info_map_.find(history_id); | 547 private_data_.history_info_map_.find(history_id); |
534 // Note that a history_id may be present in the word_id_history_map_ yet not | 548 // Note that a history_id may be present in the word_id_history_map_ yet not |
535 // be found in the history_info_map_. This occurs when an item has been | 549 // be found in the history_info_map_. This occurs when an item has been |
536 // deleted by the user or the item no longer qualifies as a quick result. | 550 // deleted by the user or the item no longer qualifies as a quick result. |
537 if (hist_pos != private_data_.history_info_map_.end()) { | 551 if (hist_pos != private_data_.history_info_map_.end()) { |
538 const URLRow& hist_item = hist_pos->second; | 552 const URLRow& hist_item = hist_pos->second; |
539 ScoredHistoryMatch match( | 553 WordStartsMap::const_iterator starts_pos = |
540 ScoredMatchForURL(hist_item, lower_string_, lower_terms_)); | 554 private_data_.word_starts_map_.find(history_id); |
555 DCHECK(starts_pos != private_data_.word_starts_map_.end()); | |
556 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_, | |
557 lower_terms_, | |
558 starts_pos->second)); | |
541 if (match.raw_score > 0) | 559 if (match.raw_score > 0) |
542 scored_matches_.push_back(match); | 560 scored_matches_.push_back(match); |
543 } | 561 } |
544 } | 562 } |
545 | 563 |
546 // static | 564 // static |
547 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. | 565 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
548 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( | 566 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( |
549 const URLRow& row, | 567 const URLRow& row, |
550 const string16& lower_string, | 568 const string16& lower_string, |
551 const String16Vector& terms) { | 569 const String16Vector& terms, |
570 const WordStarts& word_starts) { | |
552 ScoredHistoryMatch match(row); | 571 ScoredHistoryMatch match(row); |
553 GURL gurl = row.url(); | 572 GURL gurl = row.url(); |
554 if (!gurl.is_valid()) | 573 if (!gurl.is_valid()) |
555 return match; | 574 return match; |
556 | 575 |
557 // Figure out where each search term appears in the URL and/or page title | 576 // Figure out where each search term appears in the URL and/or page title |
558 // so that we can score as well as provide autocomplete highlighting. | 577 // so that we can score as well as provide autocomplete highlighting. |
559 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); | 578 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); |
560 string16 title = base::i18n::ToLower(row.title()); | 579 string16 title = base::i18n::ToLower(row.title()); |
561 int term_num = 0; | 580 int term_num = 0; |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
657 for (size_t i = 1; i < matches.size(); ++i) { | 676 for (size_t i = 1; i < matches.size(); ++i) { |
658 if (matches[i - 1].term_num > matches[i].term_num) | 677 if (matches[i - 1].term_num > matches[i].term_num) |
659 ++out_of_order; | 678 ++out_of_order; |
660 } | 679 } |
661 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / | 680 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / |
662 max_possible_out_of_order; | 681 max_possible_out_of_order; |
663 } | 682 } |
664 | 683 |
665 // Score component for how early in the match string the first search term | 684 // Score component for how early in the match string the first search term |
666 // appears. Start with kStartMaxValue points and discount by | 685 // appears. Start with kStartMaxValue points and discount by |
667 // kStartMaxValue/kMaxSignificantStart points for each character later than | 686 // kStartMaxValue/kMaxSignificantChars points for each character later than |
668 // the first at which the term begins. No points are earned if the start of | 687 // the first at which the term begins. No points are earned if the start of |
669 // the match occurs at or after kMaxSignificantStart. | 688 // the match occurs at or after kMaxSignificantChars. |
670 const size_t kMaxSignificantStart = 50; | |
671 const int kStartMaxValue = 1000; | 689 const int kStartMaxValue = 1000; |
672 int start_value = (kMaxSignificantStart - | 690 int start_value = (kMaxSignificantChars - |
673 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / | 691 std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / |
674 kMaxSignificantStart; | 692 kMaxSignificantChars; |
675 | 693 |
676 // Score component for how much of the matched string the input terms cover. | 694 // Score component for how much of the matched string the input terms cover. |
677 // kCompleteMaxValue points times the fraction of the URL/page title string | 695 // kCompleteMaxValue points times the fraction of the URL/page title string |
678 // that was matched. | 696 // that was matched. |
679 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), | 697 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), |
680 0, AccumulateMatchLength); | 698 0, AccumulateMatchLength); |
681 const size_t kMaxSignificantLength = 50; | 699 const size_t kMaxSignificantLength = 50; |
682 size_t max_significant_length = | 700 size_t max_significant_length = |
683 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); | 701 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); |
684 const int kCompleteMaxValue = 1000; | 702 const int kCompleteMaxValue = 1000; |
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
921 } | 939 } |
922 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 940 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
923 base::TimeTicks::Now() - beginning_time); | 941 base::TimeTicks::Now() - beginning_time); |
924 return true; | 942 return true; |
925 } | 943 } |
926 | 944 |
927 void URLIndexPrivateData::SavePrivateData( | 945 void URLIndexPrivateData::SavePrivateData( |
928 InMemoryURLIndexCacheItem* cache) const { | 946 InMemoryURLIndexCacheItem* cache) const { |
929 DCHECK(cache); | 947 DCHECK(cache); |
930 cache->set_timestamp(base::Time::Now().ToInternalValue()); | 948 cache->set_timestamp(base::Time::Now().ToInternalValue()); |
949 cache->set_version(saved_cache_version_); | |
931 // history_item_count_ is no longer used but rather than change the protobuf | 950 // history_item_count_ is no longer used but rather than change the protobuf |
932 // definition use a placeholder. This will go away with the switch to SQLite. | 951 // definition use a placeholder. This will go away with the switch to SQLite. |
933 cache->set_history_item_count(0); | 952 cache->set_history_item_count(0); |
934 SaveWordList(cache); | 953 SaveWordList(cache); |
935 SaveWordMap(cache); | 954 SaveWordMap(cache); |
936 SaveCharWordMap(cache); | 955 SaveCharWordMap(cache); |
937 SaveWordIDHistoryMap(cache); | 956 SaveWordIDHistoryMap(cache); |
938 SaveHistoryInfoMap(cache); | 957 SaveHistoryInfoMap(cache); |
958 SaveWordStartsMap(cache); | |
939 } | 959 } |
940 | 960 |
941 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 961 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
942 if (word_list_.empty()) | 962 if (word_list_.empty()) |
943 return; | 963 return; |
944 WordListItem* list_item = cache->mutable_word_list(); | 964 WordListItem* list_item = cache->mutable_word_list(); |
945 list_item->set_word_count(word_list_.size()); | 965 list_item->set_word_count(word_list_.size()); |
946 for (String16Vector::const_iterator iter = word_list_.begin(); | 966 for (String16Vector::const_iterator iter = word_list_.begin(); |
947 iter != word_list_.end(); ++iter) | 967 iter != word_list_.end(); ++iter) |
948 list_item->add_word(UTF16ToUTF8(*iter)); | 968 list_item->add_word(UTF16ToUTF8(*iter)); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1013 // is no need to save search_term_cache_ (not persistent), | 1033 // is no need to save search_term_cache_ (not persistent), |
1014 // languages_, etc. | 1034 // languages_, etc. |
1015 map_entry->set_visit_count(url_row.visit_count()); | 1035 map_entry->set_visit_count(url_row.visit_count()); |
1016 map_entry->set_typed_count(url_row.typed_count()); | 1036 map_entry->set_typed_count(url_row.typed_count()); |
1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1018 map_entry->set_url(url_row.url().spec()); | 1038 map_entry->set_url(url_row.url().spec()); |
1019 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
1020 } | 1040 } |
1021 } | 1041 } |
1022 | 1042 |
1043 void URLIndexPrivateData::SaveWordStartsMap( | |
1044 InMemoryURLIndexCacheItem* cache) const { | |
1045 if (word_starts_map_.empty()) | |
1046 return; | |
1047 // For unit testing: Enable saving of the cache as an earlier version to | |
1048 // allow testing of cache file upgrading in ReadFromFile(). | |
Peter Kasting
2012/03/09 02:37:43
Nit: Is this the best way of doing this? For most
mrossetti
2012/03/14 23:23:49
I hear you, and understand. Since saving using pro
| |
1049 if (saved_cache_version_ < 1) | |
1050 return; | |
1051 | |
1052 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); | |
1053 map_item->set_item_count(word_starts_map_.size()); | |
1054 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); | |
1055 iter != word_starts_map_.end(); ++iter) { | |
1056 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); | |
1057 map_entry->set_history_id(iter->first); | |
1058 const WordStarts& word_starts(iter->second); | |
1059 for (std::vector<int>::const_iterator siter = | |
1060 word_starts.url_word_starts_.begin(); | |
1061 siter != word_starts.url_word_starts_.end(); ++siter) | |
1062 map_entry->add_url_word_starts(*siter); | |
1063 for (std::vector<int>::const_iterator siter = | |
1064 word_starts.title_word_starts_.begin(); | |
1065 siter != word_starts.title_word_starts_.end(); ++siter) | |
1066 map_entry->add_title_word_starts(*siter); | |
1067 } | |
1068 } | |
1069 | |
1023 // Cache Restoring ------------------------------------------------------------- | 1070 // Cache Restoring ------------------------------------------------------------- |
1024 | 1071 |
1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1072 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1073 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
1027 // That is: ensure that the database has not been modified since the cache | 1074 // That is: ensure that the database has not been modified since the cache |
1028 // was last saved. DB file modification date is inadequate. There are no | 1075 // was last saved. DB file modification date is inadequate. There are no |
1029 // SQLite table checksums automatically stored. | 1076 // SQLite table checksums automatically stored. |
1030 Clear(); // Start with a clean slate. | 1077 Clear(); // Start with a clean slate. |
1031 | 1078 |
1032 // FIXME(mrossetti): Move File IO to another thread. | 1079 // FIXME(mrossetti): Move File IO to another thread. |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1083 rebuilt_data->history_id_word_map_.size()); | 1130 rebuilt_data->history_id_word_map_.size()); |
1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 1131 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1085 rebuilt_data->word_map_.size()); | 1132 rebuilt_data->word_map_.size()); |
1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 1133 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
1087 rebuilt_data->char_word_map_.size()); | 1134 rebuilt_data->char_word_map_.size()); |
1088 return rebuilt_data.release(); | 1135 return rebuilt_data.release(); |
1089 } | 1136 } |
1090 | 1137 |
1091 bool URLIndexPrivateData::RestorePrivateData( | 1138 bool URLIndexPrivateData::RestorePrivateData( |
1092 const InMemoryURLIndexCacheItem& cache) { | 1139 const InMemoryURLIndexCacheItem& cache) { |
1140 if (cache.has_version()) | |
1141 restored_cache_version_ = cache.version(); | |
1093 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1142 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1143 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1095 RestoreHistoryInfoMap(cache); | 1144 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); |
1096 } | 1145 } |
1097 | 1146 |
1098 bool URLIndexPrivateData::RestoreWordList( | 1147 bool URLIndexPrivateData::RestoreWordList( |
1099 const InMemoryURLIndexCacheItem& cache) { | 1148 const InMemoryURLIndexCacheItem& cache) { |
1100 if (!cache.has_word_list()) | 1149 if (!cache.has_word_list()) |
1101 return false; | 1150 return false; |
1102 const WordListItem& list_item(cache.word_list()); | 1151 const WordListItem& list_item(cache.word_list()); |
1103 uint32 expected_item_count = list_item.word_count(); | 1152 uint32 expected_item_count = list_item.word_count(); |
1104 uint32 actual_item_count = list_item.word_size(); | 1153 uint32 actual_item_count = list_item.word_size(); |
1105 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1154 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1255 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
1207 if (iter->has_title()) { | 1256 if (iter->has_title()) { |
1208 string16 title(UTF8ToUTF16(iter->title())); | 1257 string16 title(UTF8ToUTF16(iter->title())); |
1209 url_row.set_title(title); | 1258 url_row.set_title(title); |
1210 } | 1259 } |
1211 history_info_map_[history_id] = url_row; | 1260 history_info_map_[history_id] = url_row; |
1212 } | 1261 } |
1213 return true; | 1262 return true; |
1214 } | 1263 } |
1215 | 1264 |
1265 bool URLIndexPrivateData::RestoreWordStartsMap( | |
1266 const InMemoryURLIndexCacheItem& cache) { | |
1267 // Note that this function must be called after RestoreHistoryInfoMap() has | |
1268 // been run as the word starts may have to be recalculated from the urls and | |
1269 // page titles. | |
1270 if (cache.has_word_starts_map()) { | |
1271 const WordStartsMapItem& list_item(cache.word_starts_map()); | |
1272 uint32 expected_item_count = list_item.item_count(); | |
1273 uint32 actual_item_count = list_item.word_starts_map_entry_size(); | |
1274 if (actual_item_count == 0 || actual_item_count != expected_item_count) | |
1275 return false; | |
1276 const RepeatedPtrField<WordStartsMapEntry>& | |
1277 entries(list_item.word_starts_map_entry()); | |
1278 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = | |
1279 entries.begin(); iter != entries.end(); ++iter) { | |
1280 HistoryID history_id = iter->history_id(); | |
1281 WordStarts word_starts; | |
1282 // Restore the URL word starts. | |
1283 const RepeatedField<int32>& url_starts(iter->url_word_starts()); | |
1284 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin(); | |
Peter Kasting
2012/03/09 02:37:43
Nit: Argh... use |i| and |j| rather than |iter| an
| |
1285 jiter != url_starts.end(); ++jiter) | |
1286 word_starts.url_word_starts_.push_back(*jiter); | |
1287 // Restore the page title word starts. | |
1288 const RepeatedField<int32>& title_starts(iter->title_word_starts()); | |
1289 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin(); | |
1290 jiter != title_starts.end(); ++jiter) | |
1291 word_starts.title_word_starts_.push_back(*jiter); | |
1292 word_starts_map_[history_id] = word_starts; | |
1293 } | |
1294 } else { | |
1295 // Since the cache did not contain any word starts we must rebuild then from | |
1296 // the URL and page titles. | |
1297 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | |
1298 iter != history_info_map_.end(); ++iter) { | |
1299 WordStarts word_starts; | |
1300 const URLRow& row(iter->second); | |
1301 string16 url(net::FormatUrl(row.url(), languages_, | |
1302 net::kFormatUrlOmitUsernamePassword, | |
1303 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | |
1304 NULL, NULL, NULL)); | |
1305 url = base::i18n::ToLower(url); | |
1306 String16VectorFromString16(url, false, &word_starts.url_word_starts_); | |
1307 String16VectorFromString16( | |
1308 row.title(), false, &word_starts.title_word_starts_); | |
1309 word_starts_map_[iter->first] = word_starts; | |
1310 } | |
1311 } | |
1312 return true; | |
1313 } | |
1314 | |
1216 } // namespace history | 1315 } // namespace history |
OLD | NEW |