OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
16 #include "base/string_util.h" | 16 #include "base/string_util.h" |
17 #include "base/threading/thread_restrictions.h" | 17 #include "base/threading/thread_restrictions.h" |
18 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
19 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
20 #include "chrome/browser/history/url_database.h" | 20 #include "chrome/browser/history/history_database.h" |
21 #include "chrome/common/url_constants.h" | 21 #include "chrome/common/url_constants.h" |
22 #include "net/base/net_util.h" | 22 #include "net/base/net_util.h" |
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
24 | 24 |
25 using google::protobuf::RepeatedField; | 25 using google::protobuf::RepeatedField; |
26 using google::protobuf::RepeatedPtrField; | 26 using google::protobuf::RepeatedPtrField; |
27 using in_memory_url_index::InMemoryURLIndexCacheItem; | 27 using in_memory_url_index::InMemoryURLIndexCacheItem; |
28 | 28 |
29 namespace history { | 29 namespace history { |
30 | 30 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 available_words_.clear(); | 128 available_words_.clear(); |
129 word_map_.clear(); | 129 word_map_.clear(); |
130 char_word_map_.clear(); | 130 char_word_map_.clear(); |
131 word_id_history_map_.clear(); | 131 word_id_history_map_.clear(); |
132 history_id_word_map_.clear(); | 132 history_id_word_map_.clear(); |
133 history_info_map_.clear(); | 133 history_info_map_.clear(); |
134 } | 134 } |
135 | 135 |
136 // Cache Updating -------------------------------------------------------------- | 136 // Cache Updating -------------------------------------------------------------- |
137 | 137 |
138 void URLIndexPrivateData::IndexRow(const URLRow& row) { | 138 bool URLIndexPrivateData::IndexRow(const URLRow& row) { |
139 const GURL& gurl(row.url()); | 139 const GURL& gurl(row.url()); |
140 | 140 |
141 // Index only URLs with a whitelisted scheme. | 141 // Index only URLs with a whitelisted scheme. |
142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) |
143 return; | 143 return false; |
144 | 144 |
145 URLID row_id = row.id(); | 145 URLID row_id = row.id(); |
146 // Strip out username and password before saving and indexing. | 146 // Strip out username and password before saving and indexing. |
147 string16 url(net::FormatUrl(gurl, languages_, | 147 string16 url(net::FormatUrl(gurl, languages_, |
148 net::kFormatUrlOmitUsernamePassword, | 148 net::kFormatUrlOmitUsernamePassword, |
149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
150 NULL, NULL, NULL)); | 150 NULL, NULL, NULL)); |
151 | 151 |
152 HistoryID history_id = static_cast<HistoryID>(row_id); | 152 HistoryID history_id = static_cast<HistoryID>(row_id); |
153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); | 153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
154 | 154 |
155 // Add the row for quick lookup in the history info store. | 155 // Add the row for quick lookup in the history info store. |
156 URLRow new_row(GURL(url), row_id); | 156 URLRow new_row(GURL(url), row_id); |
157 new_row.set_visit_count(row.visit_count()); | 157 new_row.set_visit_count(row.visit_count()); |
158 new_row.set_typed_count(row.typed_count()); | 158 new_row.set_typed_count(row.typed_count()); |
159 new_row.set_last_visit(row.last_visit()); | 159 new_row.set_last_visit(row.last_visit()); |
160 new_row.set_title(row.title()); | 160 new_row.set_title(row.title()); |
161 history_info_map_[history_id] = new_row; | 161 history_info_map_[history_id] = new_row; |
162 | 162 |
163 // Index the words contained in the URL and title of the row. | 163 // Index the words contained in the URL and title of the row. |
164 AddRowWordsToIndex(new_row); | 164 AddRowWordsToIndex(new_row); |
165 return; | 165 return true; |
166 } | 166 } |
167 | 167 |
168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { | 168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { |
169 HistoryID history_id = static_cast<HistoryID>(row.id()); | 169 HistoryID history_id = static_cast<HistoryID>(row.id()); |
170 // Split URL into individual, unique words then add in the title words. | 170 // Split URL into individual, unique words then add in the title words. |
171 const GURL& gurl(row.url()); | 171 const GURL& gurl(row.url()); |
172 string16 url(net::FormatUrl(gurl, languages_, | 172 string16 url(net::FormatUrl(gurl, languages_, |
173 net::kFormatUrlOmitUsernamePassword, | 173 net::kFormatUrlOmitUsernamePassword, |
174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
175 NULL, NULL, NULL)); | 175 NULL, NULL, NULL)); |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 if (iter != history_id_word_map_.end()) { | 288 if (iter != history_id_word_map_.end()) { |
289 WordIDSet& word_id_set(iter->second); | 289 WordIDSet& word_id_set(iter->second); |
290 word_id_set.insert(word_id); | 290 word_id_set.insert(word_id); |
291 } else { | 291 } else { |
292 WordIDSet word_id_set; | 292 WordIDSet word_id_set; |
293 word_id_set.insert(word_id); | 293 word_id_set.insert(word_id); |
294 history_id_word_map_[history_id] = word_id_set; | 294 history_id_word_map_[history_id] = word_id_set; |
295 } | 295 } |
296 } | 296 } |
297 | 297 |
298 void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) { | 298 bool URLIndexPrivateData::UpdateURL(const URLRow& row) { |
299 // The row may or may not already be in our index. If it is not already | 299 // The row may or may not already be in our index. If it is not already |
300 // indexed and it qualifies then it gets indexed. If it is already | 300 // indexed and it qualifies then it gets indexed. If it is already |
301 // indexed and still qualifies then it gets updated, otherwise it | 301 // indexed and still qualifies then it gets updated, otherwise it |
302 // is deleted from the index. | 302 // is deleted from the index. |
| 303 bool row_was_updated = false; |
| 304 URLID row_id = row.id(); |
303 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
304 if (row_pos == history_info_map_.end()) { | 306 if (row_pos == history_info_map_.end()) { |
305 // This new row should be indexed if it qualifies. | 307 // This new row should be indexed if it qualifies. |
306 URLRow new_row(row); | 308 URLRow new_row(row); |
307 new_row.set_id(row_id); | 309 new_row.set_id(row_id); |
308 if (RowQualifiesAsSignificant(new_row, base::Time())) | 310 row_was_updated = |
309 IndexRow(new_row); | 311 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row); |
310 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 312 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
311 // This indexed row still qualifies and will be re-indexed. | 313 // This indexed row still qualifies and will be re-indexed. |
312 // The url won't have changed but the title, visit count, etc. | 314 // The url won't have changed but the title, visit count, etc. |
313 // might have changed. | 315 // might have changed. |
314 URLRow& updated_row = row_pos->second; | 316 URLRow& row_to_update = row_pos->second; |
315 updated_row.set_visit_count(row.visit_count()); | 317 bool title_updated = row_to_update.title() != row.title(); |
316 updated_row.set_typed_count(row.typed_count()); | 318 if (row_to_update.visit_count() != row.visit_count() || |
317 updated_row.set_last_visit(row.last_visit()); | 319 row_to_update.typed_count() != row.typed_count() || |
318 // While the URL is guaranteed to remain stable, the title may have changed. | 320 row_to_update.last_visit() != row.last_visit() || title_updated) { |
319 // If so, then we need to update the index with the changed words. | 321 row_to_update.set_visit_count(row.visit_count()); |
320 if (updated_row.title() != row.title()) { | 322 row_to_update.set_typed_count(row.typed_count()); |
321 // Clear all words associated with this row and re-index both the | 323 row_to_update.set_last_visit(row.last_visit()); |
322 // URL and title. | 324 // While the URL is guaranteed to remain stable, the title may have |
323 RemoveRowWordsFromIndex(updated_row); | 325 // changed. If so, then update the index with the changed words. |
324 updated_row.set_title(row.title()); | 326 if (title_updated) { |
325 AddRowWordsToIndex(updated_row); | 327 // Clear all words associated with this row and re-index both the |
| 328 // URL and title. |
| 329 RemoveRowWordsFromIndex(row_to_update); |
| 330 row_to_update.set_title(row.title()); |
| 331 AddRowWordsToIndex(row_to_update); |
| 332 } |
| 333 row_was_updated = true; |
326 } | 334 } |
327 } else { | 335 } else { |
328 // This indexed row no longer qualifies and will be de-indexed by | 336 // This indexed row no longer qualifies and will be de-indexed by |
329 // clearing all words associated with this row. | 337 // clearing all words associated with this row. |
330 URLRow& removed_row = row_pos->second; | 338 RemoveRowFromIndex(row); |
331 RemoveRowFromIndex(removed_row); | 339 row_was_updated = true; |
332 } | 340 } |
333 // This invalidates the cache. | 341 if (row_was_updated) |
334 search_term_cache_.clear(); | 342 search_term_cache_.clear(); // This invalidates the cache. |
| 343 return row_was_updated; |
335 } | 344 } |
336 | 345 |
337 void URLIndexPrivateData::DeleteURL(URLID row_id) { | 346 // Helper functor for DeleteURL. |
338 // Note that this does not remove any reference to this row from the | 347 class HistoryInfoMapItemHasURL { |
339 // word_id_history_map_. That map will continue to contain (and return) | 348 public: |
340 // hits against this row until that map is rebuilt, but since the | 349 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {} |
341 // history_info_map_ no longer references the row no erroneous results | 350 |
342 // will propagate to the user. | 351 bool operator()(const std::pair<const HistoryID, URLRow>& item) { |
343 history_info_map_.erase(row_id); | 352 return item.second.url() == url_; |
344 search_term_cache_.clear(); // This invalidates the word cache. | 353 } |
| 354 |
| 355 private: |
| 356 const GURL& url_; |
| 357 }; |
| 358 |
| 359 bool URLIndexPrivateData::DeleteURL(const GURL& url) { |
| 360 // Find the matching entry in the history_info_map_. |
| 361 HistoryInfoMap::iterator pos = std::find_if( |
| 362 history_info_map_.begin(), |
| 363 history_info_map_.end(), |
| 364 HistoryInfoMapItemHasURL(url)); |
| 365 if (pos == history_info_map_.end()) |
| 366 return false; |
| 367 RemoveRowFromIndex(pos->second); |
| 368 search_term_cache_.clear(); // This invalidates the cache. |
| 369 return true; |
345 } | 370 } |
346 | 371 |
347 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { | 372 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { |
348 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | 373 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); |
349 } | 374 } |
350 | 375 |
351 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 376 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
352 | 377 |
353 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( | 378 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( |
354 const HistoryInfoMap& history_info_map) | 379 const HistoryInfoMap& history_info_map) |
(...skipping 613 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
968 map_entry->set_visit_count(url_row.visit_count()); | 993 map_entry->set_visit_count(url_row.visit_count()); |
969 map_entry->set_typed_count(url_row.typed_count()); | 994 map_entry->set_typed_count(url_row.typed_count()); |
970 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 995 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
971 map_entry->set_url(url_row.url().spec()); | 996 map_entry->set_url(url_row.url().spec()); |
972 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 997 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
973 } | 998 } |
974 } | 999 } |
975 | 1000 |
976 // Cache Restoring ------------------------------------------------------------- | 1001 // Cache Restoring ------------------------------------------------------------- |
977 | 1002 |
978 bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) { | |
979 Clear(); | |
980 | |
981 if (!history_db) | |
982 return false; | |
983 | |
984 base::TimeTicks beginning_time = base::TimeTicks::Now(); | |
985 URLDatabase::URLEnumerator history_enum; | |
986 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | |
987 return false; | |
988 URLRow row; | |
989 while (history_enum.GetNextURL(&row)) | |
990 IndexRow(row); | |
991 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | |
992 base::TimeTicks::Now() - beginning_time); | |
993 return true; | |
994 } | |
995 | |
996 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1003 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
997 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1004 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
998 // That is: ensure that the database has not been modified since the cache | 1005 // That is: ensure that the database has not been modified since the cache |
999 // was last saved. DB file modification date is inadequate. There are no | 1006 // was last saved. DB file modification date is inadequate. There are no |
1000 // SQLite table checksums automatically stored. | 1007 // SQLite table checksums automatically stored. |
| 1008 Clear(); // Start with a clean slate. |
| 1009 |
1001 // FIXME(mrossetti): Move File IO to another thread. | 1010 // FIXME(mrossetti): Move File IO to another thread. |
1002 base::ThreadRestrictions::ScopedAllowIO allow_io; | 1011 base::ThreadRestrictions::ScopedAllowIO allow_io; |
1003 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1012 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 1013 if (!file_util::PathExists(file_path)) |
| 1014 return false; |
1004 std::string data; | 1015 std::string data; |
1005 // If there is no cache file then simply give up. This will cause us to | 1016 // If there is no cache file then simply give up. This will cause us to |
1006 // attempt to rebuild from the history database. | 1017 // attempt to rebuild from the history database. |
1007 if (!file_util::ReadFileToString(file_path, &data)) | 1018 if (!file_util::ReadFileToString(file_path, &data)) |
1008 return false; | 1019 return false; |
1009 | 1020 |
1010 InMemoryURLIndexCacheItem index_cache; | 1021 InMemoryURLIndexCacheItem index_cache; |
1011 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { | 1022 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
1012 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " | 1023 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " |
1013 << file_path.value(); | 1024 << file_path.value(); |
1014 return false; | 1025 return false; |
1015 } | 1026 } |
1016 | 1027 |
1017 if (!RestorePrivateData(index_cache)) { | 1028 if (!RestorePrivateData(index_cache)) { |
1018 Clear(); // Back to square one -- must build from scratch. | 1029 Clear(); // Back to square one -- must build from scratch. |
1019 return false; | 1030 return false; |
1020 } | 1031 } |
1021 | 1032 |
1022 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 1033 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
1023 base::TimeTicks::Now() - beginning_time); | 1034 base::TimeTicks::Now() - beginning_time); |
1024 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1035 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
1025 history_id_word_map_.size()); | 1036 history_id_word_map_.size()); |
1026 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 1037 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
1027 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 1038 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); |
1028 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 1039 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); |
1029 return true; | 1040 return true; |
1030 } | 1041 } |
1031 | 1042 |
| 1043 // static |
| 1044 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( |
| 1045 HistoryDatabase* history_db) { |
| 1046 if (!history_db) |
| 1047 return NULL; |
| 1048 |
| 1049 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 1050 |
| 1051 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
| 1052 URLDatabase::URLEnumerator history_enum; |
| 1053 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
| 1054 return NULL; |
| 1055 for (URLRow row; history_enum.GetNextURL(&row); ) |
| 1056 rebuilt_data->IndexRow(row); |
| 1057 |
| 1058 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
| 1059 base::TimeTicks::Now() - beginning_time); |
| 1060 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 1061 rebuilt_data->history_id_word_map_.size()); |
| 1062 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| 1063 rebuilt_data->word_map_.size()); |
| 1064 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 1065 rebuilt_data->char_word_map_.size()); |
| 1066 return rebuilt_data.release(); |
| 1067 } |
| 1068 |
1032 bool URLIndexPrivateData::RestorePrivateData( | 1069 bool URLIndexPrivateData::RestorePrivateData( |
1033 const InMemoryURLIndexCacheItem& cache) { | 1070 const InMemoryURLIndexCacheItem& cache) { |
1034 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1071 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1035 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1072 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1036 RestoreHistoryInfoMap(cache); | 1073 RestoreHistoryInfoMap(cache); |
1037 } | 1074 } |
1038 | 1075 |
1039 bool URLIndexPrivateData::RestoreWordList( | 1076 bool URLIndexPrivateData::RestoreWordList( |
1040 const InMemoryURLIndexCacheItem& cache) { | 1077 const InMemoryURLIndexCacheItem& cache) { |
1041 if (!cache.has_word_list()) | 1078 if (!cache.has_word_list()) |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1148 if (iter->has_title()) { | 1185 if (iter->has_title()) { |
1149 string16 title(UTF8ToUTF16(iter->title())); | 1186 string16 title(UTF8ToUTF16(iter->title())); |
1150 url_row.set_title(title); | 1187 url_row.set_title(title); |
1151 } | 1188 } |
1152 history_info_map_[history_id] = url_row; | 1189 history_info_map_[history_id] = url_row; |
1153 } | 1190 } |
1154 return true; | 1191 return true; |
1155 } | 1192 } |
1156 | 1193 |
1157 } // namespace history | 1194 } // namespace history |
OLD | NEW |