OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
(...skipping 24 matching lines...) Expand all Loading... |
35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry | 35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry |
36 CharWordMapEntry; | 36 CharWordMapEntry; |
37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem | 37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem |
38 WordIDHistoryMapItem; | 38 WordIDHistoryMapItem; |
39 typedef imui:: | 39 typedef imui:: |
40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry | 40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry |
41 WordIDHistoryMapEntry; | 41 WordIDHistoryMapEntry; |
42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; | 42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry | 43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
44 HistoryInfoMapEntry; | 44 HistoryInfoMapEntry; |
| 45 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; |
| 46 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry |
| 47 WordStartsMapEntry; |
45 | 48 |
46 // The maximum score any candidate result can achieve. | 49 // The maximum score any candidate result can achieve. |
47 const int kMaxTotalScore = 1425; | 50 const int kMaxTotalScore = 1425; |
48 | 51 |
49 // Score ranges used to get a 'base' score for each of the scoring factors | 52 // Score ranges used to get a 'base' score for each of the scoring factors |
50 // (such as recency of last visit, times visited, times the URL was typed, | 53 // (such as recency of last visit, times visited, times the URL was typed, |
51 // and the quality of the string match). There is a matching value range for | 54 // and the quality of the string match). There is a matching value range for |
52 // each of these scores for each factor. Note that the top score is greater | 55 // each of these scores for each factor. Note that the top score is greater |
53 // than |kMaxTotalScore|. The score for each candidate will be capped in the | 56 // than |kMaxTotalScore|. The score for each candidate will be capped in the |
54 // final calculation. | 57 // final calculation. |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
108 score += (value - value_ranks[i]) * | 111 score += (value - value_ranks[i]) * |
109 (kScoreRank[i - 1] - kScoreRank[i]) / | 112 (kScoreRank[i - 1] - kScoreRank[i]) / |
110 (value_ranks[i - 1] - value_ranks[i]); | 113 (value_ranks[i - 1] - value_ranks[i]); |
111 } | 114 } |
112 return score; | 115 return score; |
113 } | 116 } |
114 | 117 |
115 // InMemoryURLIndex's Private Data --------------------------------------------- | 118 // InMemoryURLIndex's Private Data --------------------------------------------- |
116 | 119 |
117 URLIndexPrivateData::URLIndexPrivateData() | 120 URLIndexPrivateData::URLIndexPrivateData() |
118 : pre_filter_item_count_(0), | 121 : restored_cache_version_(0), |
| 122 saved_cache_version_(kCurrentCacheFileVersion), |
| 123 pre_filter_item_count_(0), |
119 post_filter_item_count_(0), | 124 post_filter_item_count_(0), |
120 post_scoring_item_count_(0) { | 125 post_scoring_item_count_(0) { |
121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); | 126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); |
122 } | 127 } |
123 | 128 |
124 URLIndexPrivateData::~URLIndexPrivateData() {} | 129 URLIndexPrivateData::~URLIndexPrivateData() {} |
125 | 130 |
126 void URLIndexPrivateData::Clear() { | 131 void URLIndexPrivateData::Clear() { |
127 word_list_.clear(); | 132 word_list_.clear(); |
128 available_words_.clear(); | 133 available_words_.clear(); |
129 word_map_.clear(); | 134 word_map_.clear(); |
130 char_word_map_.clear(); | 135 char_word_map_.clear(); |
131 word_id_history_map_.clear(); | 136 word_id_history_map_.clear(); |
132 history_id_word_map_.clear(); | 137 history_id_word_map_.clear(); |
133 history_info_map_.clear(); | 138 history_info_map_.clear(); |
| 139 word_starts_map_.clear(); |
134 } | 140 } |
135 | 141 |
136 // Cache Updating -------------------------------------------------------------- | 142 // Cache Updating -------------------------------------------------------------- |
137 | 143 |
138 bool URLIndexPrivateData::IndexRow(const URLRow& row) { | 144 bool URLIndexPrivateData::IndexRow(const URLRow& row) { |
139 const GURL& gurl(row.url()); | 145 const GURL& gurl(row.url()); |
140 | 146 |
141 // Index only URLs with a whitelisted scheme. | 147 // Index only URLs with a whitelisted scheme. |
142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) |
143 return false; | 149 return false; |
(...skipping 10 matching lines...) Expand all Loading... |
154 | 160 |
155 // Add the row for quick lookup in the history info store. | 161 // Add the row for quick lookup in the history info store. |
156 URLRow new_row(GURL(url), row_id); | 162 URLRow new_row(GURL(url), row_id); |
157 new_row.set_visit_count(row.visit_count()); | 163 new_row.set_visit_count(row.visit_count()); |
158 new_row.set_typed_count(row.typed_count()); | 164 new_row.set_typed_count(row.typed_count()); |
159 new_row.set_last_visit(row.last_visit()); | 165 new_row.set_last_visit(row.last_visit()); |
160 new_row.set_title(row.title()); | 166 new_row.set_title(row.title()); |
161 history_info_map_[history_id] = new_row; | 167 history_info_map_[history_id] = new_row; |
162 | 168 |
163 // Index the words contained in the URL and title of the row. | 169 // Index the words contained in the URL and title of the row. |
164 AddRowWordsToIndex(new_row); | 170 RowWordStarts word_starts; |
| 171 AddRowWordsToIndex(new_row, &word_starts); |
| 172 word_starts_map_[history_id] = word_starts; |
165 return true; | 173 return true; |
166 } | 174 } |
167 | 175 |
168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { | 176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
| 177 RowWordStarts* word_starts) { |
169 HistoryID history_id = static_cast<HistoryID>(row.id()); | 178 HistoryID history_id = static_cast<HistoryID>(row.id()); |
170 // Split URL into individual, unique words then add in the title words. | 179 // Split URL into individual, unique words then add in the title words. |
171 const GURL& gurl(row.url()); | 180 const GURL& gurl(row.url()); |
172 string16 url(net::FormatUrl(gurl, languages_, | 181 string16 url(net::FormatUrl(gurl, languages_, |
173 net::kFormatUrlOmitUsernamePassword, | 182 net::kFormatUrlOmitUsernamePassword, |
174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 183 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
175 NULL, NULL, NULL)); | 184 NULL, NULL, NULL)); |
176 url = base::i18n::ToLower(url); | 185 url = base::i18n::ToLower(url); |
177 String16Set url_words = String16SetFromString16(url); | 186 String16Set url_words = String16SetFromString16(url, |
178 String16Set title_words = String16SetFromString16(row.title()); | 187 word_starts ? &word_starts->url_word_starts_ : NULL); |
| 188 String16Set title_words = String16SetFromString16(row.title(), |
| 189 word_starts ? &word_starts->title_word_starts_ : NULL); |
179 String16Set words; | 190 String16Set words; |
180 std::set_union(url_words.begin(), url_words.end(), | 191 std::set_union(url_words.begin(), url_words.end(), |
181 title_words.begin(), title_words.end(), | 192 title_words.begin(), title_words.end(), |
182 std::insert_iterator<String16Set>(words, words.begin())); | 193 std::insert_iterator<String16Set>(words, words.begin())); |
183 for (String16Set::iterator word_iter = words.begin(); | 194 for (String16Set::iterator word_iter = words.begin(); |
184 word_iter != words.end(); ++word_iter) | 195 word_iter != words.end(); ++word_iter) |
185 AddWordToIndex(*word_iter, history_id); | 196 AddWordToIndex(*word_iter, history_id); |
186 | 197 |
187 search_term_cache_.clear(); // Invalidate the term cache. | 198 search_term_cache_.clear(); // Invalidate the term cache. |
188 } | 199 } |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
239 word_id_set.insert(word_id); | 250 word_id_set.insert(word_id); |
240 char_word_map_[uni_char] = word_id_set; | 251 char_word_map_[uni_char] = word_id_set; |
241 } | 252 } |
242 } | 253 } |
243 } | 254 } |
244 | 255 |
245 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { | 256 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { |
246 RemoveRowWordsFromIndex(row); | 257 RemoveRowWordsFromIndex(row); |
247 HistoryID history_id = static_cast<HistoryID>(row.id()); | 258 HistoryID history_id = static_cast<HistoryID>(row.id()); |
248 history_info_map_.erase(history_id); | 259 history_info_map_.erase(history_id); |
| 260 word_starts_map_.erase(history_id); |
249 } | 261 } |
250 | 262 |
251 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { | 263 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { |
252 // Remove the entries in history_id_word_map_ and word_id_history_map_ for | 264 // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
253 // this row. | 265 // this row. |
254 HistoryID history_id = static_cast<HistoryID>(row.id()); | 266 HistoryID history_id = static_cast<HistoryID>(row.id()); |
255 WordIDSet word_id_set = history_id_word_map_[history_id]; | 267 WordIDSet word_id_set = history_id_word_map_[history_id]; |
256 history_id_word_map_.erase(history_id); | 268 history_id_word_map_.erase(history_id); |
257 | 269 |
258 // Reconcile any changes to word usage. | 270 // Reconcile any changes to word usage. |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
321 row_to_update.set_visit_count(row.visit_count()); | 333 row_to_update.set_visit_count(row.visit_count()); |
322 row_to_update.set_typed_count(row.typed_count()); | 334 row_to_update.set_typed_count(row.typed_count()); |
323 row_to_update.set_last_visit(row.last_visit()); | 335 row_to_update.set_last_visit(row.last_visit()); |
324 // While the URL is guaranteed to remain stable, the title may have | 336 // While the URL is guaranteed to remain stable, the title may have |
325 // changed. If so, then update the index with the changed words. | 337 // changed. If so, then update the index with the changed words. |
326 if (title_updated) { | 338 if (title_updated) { |
327 // Clear all words associated with this row and re-index both the | 339 // Clear all words associated with this row and re-index both the |
328 // URL and title. | 340 // URL and title. |
329 RemoveRowWordsFromIndex(row_to_update); | 341 RemoveRowWordsFromIndex(row_to_update); |
330 row_to_update.set_title(row.title()); | 342 row_to_update.set_title(row.title()); |
331 AddRowWordsToIndex(row_to_update); | 343 RowWordStarts word_starts; |
| 344 AddRowWordsToIndex(row_to_update, &word_starts); |
| 345 word_starts_map_[row_id] = word_starts; |
332 } | 346 } |
333 row_was_updated = true; | 347 row_was_updated = true; |
334 } | 348 } |
335 } else { | 349 } else { |
336 // This indexed row no longer qualifies and will be de-indexed by | 350 // This indexed row no longer qualifies and will be de-indexed by |
337 // clearing all words associated with this row. | 351 // clearing all words associated with this row. |
338 RemoveRowFromIndex(row); | 352 RemoveRowFromIndex(row); |
339 row_was_updated = true; | 353 row_was_updated = true; |
340 } | 354 } |
341 if (row_was_updated) | 355 if (row_was_updated) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
417 // the final filtering we need whitespace separated substrings possibly | 431 // the final filtering we need whitespace separated substrings possibly |
418 // containing escaped characters. | 432 // containing escaped characters. |
419 string16 lower_raw_string(base::i18n::ToLower(search_string)); | 433 string16 lower_raw_string(base::i18n::ToLower(search_string)); |
420 string16 lower_unescaped_string = | 434 string16 lower_unescaped_string = |
421 net::UnescapeURLComponent(lower_raw_string, | 435 net::UnescapeURLComponent(lower_raw_string, |
422 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); | 436 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS); |
423 // Extract individual 'words' (as opposed to 'terms'; see below) from the | 437 // Extract individual 'words' (as opposed to 'terms'; see below) from the |
424 // search string. When the user types "colspec=ID%20Mstone Release" we get | 438 // search string. When the user types "colspec=ID%20Mstone Release" we get |
425 // four 'words': "colspec", "id", "mstone" and "release". | 439 // four 'words': "colspec", "id", "mstone" and "release". |
426 String16Vector lower_words( | 440 String16Vector lower_words( |
427 history::String16VectorFromString16(lower_unescaped_string, false)); | 441 history::String16VectorFromString16(lower_unescaped_string, false, NULL)); |
428 ScoredHistoryMatches scored_items; | 442 ScoredHistoryMatches scored_items; |
429 | 443 |
430 // Do nothing if we have indexed no words (probably because we've not been | 444 // Do nothing if we have indexed no words (probably because we've not been |
431 // initialized yet) or the search string has no words. | 445 // initialized yet) or the search string has no words. |
432 if (word_list_.empty() || lower_words.empty()) { | 446 if (word_list_.empty() || lower_words.empty()) { |
433 search_term_cache_.clear(); // Invalidate the term cache. | 447 search_term_cache_.clear(); // Invalidate the term cache. |
434 return scored_items; | 448 return scored_items; |
435 } | 449 } |
436 | 450 |
437 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 451 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
529 | 543 |
530 void URLIndexPrivateData::AddHistoryMatch::operator()( | 544 void URLIndexPrivateData::AddHistoryMatch::operator()( |
531 const HistoryID history_id) { | 545 const HistoryID history_id) { |
532 HistoryInfoMap::const_iterator hist_pos = | 546 HistoryInfoMap::const_iterator hist_pos = |
533 private_data_.history_info_map_.find(history_id); | 547 private_data_.history_info_map_.find(history_id); |
534 // Note that a history_id may be present in the word_id_history_map_ yet not | 548 // Note that a history_id may be present in the word_id_history_map_ yet not |
535 // be found in the history_info_map_. This occurs when an item has been | 549 // be found in the history_info_map_. This occurs when an item has been |
536 // deleted by the user or the item no longer qualifies as a quick result. | 550 // deleted by the user or the item no longer qualifies as a quick result. |
537 if (hist_pos != private_data_.history_info_map_.end()) { | 551 if (hist_pos != private_data_.history_info_map_.end()) { |
538 const URLRow& hist_item = hist_pos->second; | 552 const URLRow& hist_item = hist_pos->second; |
539 ScoredHistoryMatch match( | 553 WordStartsMap::const_iterator starts_pos = |
540 ScoredMatchForURL(hist_item, lower_string_, lower_terms_)); | 554 private_data_.word_starts_map_.find(history_id); |
| 555 DCHECK(starts_pos != private_data_.word_starts_map_.end()); |
| 556 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_, |
| 557 lower_terms_, |
| 558 starts_pos->second)); |
541 if (match.raw_score > 0) | 559 if (match.raw_score > 0) |
542 scored_matches_.push_back(match); | 560 scored_matches_.push_back(match); |
543 } | 561 } |
544 } | 562 } |
545 | 563 |
546 // static | 564 // static |
547 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. | 565 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
548 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( | 566 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( |
549 const URLRow& row, | 567 const URLRow& row, |
550 const string16& lower_string, | 568 const string16& lower_string, |
551 const String16Vector& terms) { | 569 const String16Vector& terms, |
| 570 const RowWordStarts& word_starts) { |
552 ScoredHistoryMatch match(row); | 571 ScoredHistoryMatch match(row); |
553 GURL gurl = row.url(); | 572 GURL gurl = row.url(); |
554 if (!gurl.is_valid()) | 573 if (!gurl.is_valid()) |
555 return match; | 574 return match; |
556 | 575 |
557 // Figure out where each search term appears in the URL and/or page title | 576 // Figure out where each search term appears in the URL and/or page title |
558 // so that we can score as well as provide autocomplete highlighting. | 577 // so that we can score as well as provide autocomplete highlighting. |
559 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); | 578 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); |
560 string16 title = base::i18n::ToLower(row.title()); | 579 string16 title = base::i18n::ToLower(row.title()); |
561 int term_num = 0; | 580 int term_num = 0; |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
657 for (size_t i = 1; i < matches.size(); ++i) { | 676 for (size_t i = 1; i < matches.size(); ++i) { |
658 if (matches[i - 1].term_num > matches[i].term_num) | 677 if (matches[i - 1].term_num > matches[i].term_num) |
659 ++out_of_order; | 678 ++out_of_order; |
660 } | 679 } |
661 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / | 680 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / |
662 max_possible_out_of_order; | 681 max_possible_out_of_order; |
663 } | 682 } |
664 | 683 |
665 // Score component for how early in the match string the first search term | 684 // Score component for how early in the match string the first search term |
666 // appears. Start with kStartMaxValue points and discount by | 685 // appears. Start with kStartMaxValue points and discount by |
667 // kStartMaxValue/kMaxSignificantStart points for each character later than | 686 // kStartMaxValue/kMaxSignificantChars points for each character later than |
668 // the first at which the term begins. No points are earned if the start of | 687 // the first at which the term begins. No points are earned if the start of |
669 // the match occurs at or after kMaxSignificantStart. | 688 // the match occurs at or after kMaxSignificantChars. |
670 const size_t kMaxSignificantStart = 50; | |
671 const int kStartMaxValue = 1000; | 689 const int kStartMaxValue = 1000; |
672 int start_value = (kMaxSignificantStart - | 690 int start_value = (kMaxSignificantChars - |
673 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / | 691 std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / |
674 kMaxSignificantStart; | 692 kMaxSignificantChars; |
675 | 693 |
676 // Score component for how much of the matched string the input terms cover. | 694 // Score component for how much of the matched string the input terms cover. |
677 // kCompleteMaxValue points times the fraction of the URL/page title string | 695 // kCompleteMaxValue points times the fraction of the URL/page title string |
678 // that was matched. | 696 // that was matched. |
679 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), | 697 size_t term_length_total = std::accumulate(matches.begin(), matches.end(), |
680 0, AccumulateMatchLength); | 698 0, AccumulateMatchLength); |
681 const size_t kMaxSignificantLength = 50; | 699 const size_t kMaxSignificantLength = 50; |
682 size_t max_significant_length = | 700 size_t max_significant_length = |
683 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); | 701 std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); |
684 const int kCompleteMaxValue = 1000; | 702 const int kCompleteMaxValue = 1000; |
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
921 } | 939 } |
922 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 940 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
923 base::TimeTicks::Now() - beginning_time); | 941 base::TimeTicks::Now() - beginning_time); |
924 return true; | 942 return true; |
925 } | 943 } |
926 | 944 |
927 void URLIndexPrivateData::SavePrivateData( | 945 void URLIndexPrivateData::SavePrivateData( |
928 InMemoryURLIndexCacheItem* cache) const { | 946 InMemoryURLIndexCacheItem* cache) const { |
929 DCHECK(cache); | 947 DCHECK(cache); |
930 cache->set_timestamp(base::Time::Now().ToInternalValue()); | 948 cache->set_timestamp(base::Time::Now().ToInternalValue()); |
| 949 cache->set_version(saved_cache_version_); |
931 // history_item_count_ is no longer used but rather than change the protobuf | 950 // history_item_count_ is no longer used but rather than change the protobuf |
932 // definition use a placeholder. This will go away with the switch to SQLite. | 951 // definition use a placeholder. This will go away with the switch to SQLite. |
933 cache->set_history_item_count(0); | 952 cache->set_history_item_count(0); |
934 SaveWordList(cache); | 953 SaveWordList(cache); |
935 SaveWordMap(cache); | 954 SaveWordMap(cache); |
936 SaveCharWordMap(cache); | 955 SaveCharWordMap(cache); |
937 SaveWordIDHistoryMap(cache); | 956 SaveWordIDHistoryMap(cache); |
938 SaveHistoryInfoMap(cache); | 957 SaveHistoryInfoMap(cache); |
| 958 SaveWordStartsMap(cache); |
939 } | 959 } |
940 | 960 |
941 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 961 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
942 if (word_list_.empty()) | 962 if (word_list_.empty()) |
943 return; | 963 return; |
944 WordListItem* list_item = cache->mutable_word_list(); | 964 WordListItem* list_item = cache->mutable_word_list(); |
945 list_item->set_word_count(word_list_.size()); | 965 list_item->set_word_count(word_list_.size()); |
946 for (String16Vector::const_iterator iter = word_list_.begin(); | 966 for (String16Vector::const_iterator iter = word_list_.begin(); |
947 iter != word_list_.end(); ++iter) | 967 iter != word_list_.end(); ++iter) |
948 list_item->add_word(UTF16ToUTF8(*iter)); | 968 list_item->add_word(UTF16ToUTF8(*iter)); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1013 // is no need to save search_term_cache_ (not persistent), | 1033 // is no need to save search_term_cache_ (not persistent), |
1014 // languages_, etc. | 1034 // languages_, etc. |
1015 map_entry->set_visit_count(url_row.visit_count()); | 1035 map_entry->set_visit_count(url_row.visit_count()); |
1016 map_entry->set_typed_count(url_row.typed_count()); | 1036 map_entry->set_typed_count(url_row.typed_count()); |
1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1018 map_entry->set_url(url_row.url().spec()); | 1038 map_entry->set_url(url_row.url().spec()); |
1019 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
1020 } | 1040 } |
1021 } | 1041 } |
1022 | 1042 |
| 1043 void URLIndexPrivateData::SaveWordStartsMap( |
| 1044 InMemoryURLIndexCacheItem* cache) const { |
| 1045 if (word_starts_map_.empty()) |
| 1046 return; |
| 1047 // For unit testing: Enable saving of the cache as an earlier version to |
| 1048 // allow testing of cache file upgrading in ReadFromFile(). |
| 1049 // TODO(mrossetti): Instead of intruding on production code with this kind of |
| 1050 // test harness, save a copy of an older version cache with known results. |
| 1051 // Implement this when switching the caching over to SQLite. |
| 1052 if (saved_cache_version_ < 1) |
| 1053 return; |
| 1054 |
| 1055 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); |
| 1056 map_item->set_item_count(word_starts_map_.size()); |
| 1057 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); |
| 1058 iter != word_starts_map_.end(); ++iter) { |
| 1059 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); |
| 1060 map_entry->set_history_id(iter->first); |
| 1061 const RowWordStarts& word_starts(iter->second); |
| 1062 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); |
| 1063 i != word_starts.url_word_starts_.end(); ++i) |
| 1064 map_entry->add_url_word_starts(*i); |
| 1065 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); |
| 1066 i != word_starts.title_word_starts_.end(); ++i) |
| 1067 map_entry->add_title_word_starts(*i); |
| 1068 } |
| 1069 } |
| 1070 |
1023 // Cache Restoring ------------------------------------------------------------- | 1071 // Cache Restoring ------------------------------------------------------------- |
1024 | 1072 |
1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1073 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1074 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
1027 // That is: ensure that the database has not been modified since the cache | 1075 // That is: ensure that the database has not been modified since the cache |
1028 // was last saved. DB file modification date is inadequate. There are no | 1076 // was last saved. DB file modification date is inadequate. There are no |
1029 // SQLite table checksums automatically stored. | 1077 // SQLite table checksums automatically stored. |
1030 Clear(); // Start with a clean slate. | 1078 Clear(); // Start with a clean slate. |
1031 | 1079 |
1032 // FIXME(mrossetti): Move File IO to another thread. | 1080 // FIXME(mrossetti): Move File IO to another thread. |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1083 rebuilt_data->history_id_word_map_.size()); | 1131 rebuilt_data->history_id_word_map_.size()); |
1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1085 rebuilt_data->word_map_.size()); | 1133 rebuilt_data->word_map_.size()); |
1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 1134 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
1087 rebuilt_data->char_word_map_.size()); | 1135 rebuilt_data->char_word_map_.size()); |
1088 return rebuilt_data.release(); | 1136 return rebuilt_data.release(); |
1089 } | 1137 } |
1090 | 1138 |
1091 bool URLIndexPrivateData::RestorePrivateData( | 1139 bool URLIndexPrivateData::RestorePrivateData( |
1092 const InMemoryURLIndexCacheItem& cache) { | 1140 const InMemoryURLIndexCacheItem& cache) { |
| 1141 if (cache.has_version()) |
| 1142 restored_cache_version_ = cache.version(); |
1093 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1143 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1144 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1095 RestoreHistoryInfoMap(cache); | 1145 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); |
1096 } | 1146 } |
1097 | 1147 |
1098 bool URLIndexPrivateData::RestoreWordList( | 1148 bool URLIndexPrivateData::RestoreWordList( |
1099 const InMemoryURLIndexCacheItem& cache) { | 1149 const InMemoryURLIndexCacheItem& cache) { |
1100 if (!cache.has_word_list()) | 1150 if (!cache.has_word_list()) |
1101 return false; | 1151 return false; |
1102 const WordListItem& list_item(cache.word_list()); | 1152 const WordListItem& list_item(cache.word_list()); |
1103 uint32 expected_item_count = list_item.word_count(); | 1153 uint32 expected_item_count = list_item.word_count(); |
1104 uint32 actual_item_count = list_item.word_size(); | 1154 uint32 actual_item_count = list_item.word_size(); |
1105 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1155 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1256 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
1207 if (iter->has_title()) { | 1257 if (iter->has_title()) { |
1208 string16 title(UTF8ToUTF16(iter->title())); | 1258 string16 title(UTF8ToUTF16(iter->title())); |
1209 url_row.set_title(title); | 1259 url_row.set_title(title); |
1210 } | 1260 } |
1211 history_info_map_[history_id] = url_row; | 1261 history_info_map_[history_id] = url_row; |
1212 } | 1262 } |
1213 return true; | 1263 return true; |
1214 } | 1264 } |
1215 | 1265 |
| 1266 bool URLIndexPrivateData::RestoreWordStartsMap( |
| 1267 const InMemoryURLIndexCacheItem& cache) { |
| 1268 // Note that this function must be called after RestoreHistoryInfoMap() has |
| 1269 // been run as the word starts may have to be recalculated from the urls and |
| 1270 // page titles. |
| 1271 if (cache.has_word_starts_map()) { |
| 1272 const WordStartsMapItem& list_item(cache.word_starts_map()); |
| 1273 uint32 expected_item_count = list_item.item_count(); |
| 1274 uint32 actual_item_count = list_item.word_starts_map_entry_size(); |
| 1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| 1276 return false; |
| 1277 const RepeatedPtrField<WordStartsMapEntry>& |
| 1278 entries(list_item.word_starts_map_entry()); |
| 1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = |
| 1280 entries.begin(); iter != entries.end(); ++iter) { |
| 1281 HistoryID history_id = iter->history_id(); |
| 1282 RowWordStarts word_starts; |
| 1283 // Restore the URL word starts. |
| 1284 const RepeatedField<int32>& url_starts(iter->url_word_starts()); |
| 1285 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin(); |
| 1286 jiter != url_starts.end(); ++jiter) |
| 1287 word_starts.url_word_starts_.push_back(*jiter); |
| 1288 // Restore the page title word starts. |
| 1289 const RepeatedField<int32>& title_starts(iter->title_word_starts()); |
| 1290 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin(); |
| 1291 jiter != title_starts.end(); ++jiter) |
| 1292 word_starts.title_word_starts_.push_back(*jiter); |
| 1293 word_starts_map_[history_id] = word_starts; |
| 1294 } |
| 1295 } else { |
| 1296 // Since the cache did not contain any word starts we must rebuild then from |
| 1297 // the URL and page titles. |
| 1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
| 1299 iter != history_info_map_.end(); ++iter) { |
| 1300 RowWordStarts word_starts; |
| 1301 const URLRow& row(iter->second); |
| 1302 string16 url(net::FormatUrl(row.url(), languages_, |
| 1303 net::kFormatUrlOmitUsernamePassword, |
| 1304 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
| 1305 NULL, NULL, NULL)); |
| 1306 url = base::i18n::ToLower(url); |
| 1307 String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
| 1308 String16VectorFromString16( |
| 1309 row.title(), false, &word_starts.title_word_starts_); |
| 1310 word_starts_map_[iter->first] = word_starts; |
| 1311 } |
| 1312 } |
| 1313 return true; |
| 1314 } |
| 1315 |
1216 } // namespace history | 1316 } // namespace history |
OLD | NEW |