Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9316109: Move Ownership of IMUI to HistoryService. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Qualify globally scoped friend classes. Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
15 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
16 #include "base/string_util.h" 16 #include "base/string_util.h"
17 #include "base/threading/thread_restrictions.h" 17 #include "base/threading/thread_restrictions.h"
18 #include "base/utf_string_conversions.h" 18 #include "base/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/autocomplete.h" 19 #include "chrome/browser/autocomplete/autocomplete.h"
20 #include "chrome/browser/history/url_database.h" 20 #include "chrome/browser/history/history_database.h"
21 #include "chrome/common/url_constants.h" 21 #include "chrome/common/url_constants.h"
22 #include "net/base/net_util.h" 22 #include "net/base/net_util.h"
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
24 24
25 using google::protobuf::RepeatedField; 25 using google::protobuf::RepeatedField;
26 using google::protobuf::RepeatedPtrField; 26 using google::protobuf::RepeatedPtrField;
27 using in_memory_url_index::InMemoryURLIndexCacheItem; 27 using in_memory_url_index::InMemoryURLIndexCacheItem;
28 28
29 namespace history { 29 namespace history {
30 30
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 available_words_.clear(); 128 available_words_.clear();
129 word_map_.clear(); 129 word_map_.clear();
130 char_word_map_.clear(); 130 char_word_map_.clear();
131 word_id_history_map_.clear(); 131 word_id_history_map_.clear();
132 history_id_word_map_.clear(); 132 history_id_word_map_.clear();
133 history_info_map_.clear(); 133 history_info_map_.clear();
134 } 134 }
135 135
136 // Cache Updating -------------------------------------------------------------- 136 // Cache Updating --------------------------------------------------------------
137 137
138 void URLIndexPrivateData::IndexRow(const URLRow& row) { 138 bool URLIndexPrivateData::IndexRow(const URLRow& row) {
139 const GURL& gurl(row.url()); 139 const GURL& gurl(row.url());
140 140
141 // Index only URLs with a whitelisted scheme. 141 // Index only URLs with a whitelisted scheme.
142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) 142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))
143 return; 143 return false;
144 144
145 URLID row_id = row.id(); 145 URLID row_id = row.id();
146 // Strip out username and password before saving and indexing. 146 // Strip out username and password before saving and indexing.
147 string16 url(net::FormatUrl(gurl, languages_, 147 string16 url(net::FormatUrl(gurl, languages_,
148 net::kFormatUrlOmitUsernamePassword, 148 net::kFormatUrlOmitUsernamePassword,
149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
150 NULL, NULL, NULL)); 150 NULL, NULL, NULL));
151 151
152 HistoryID history_id = static_cast<HistoryID>(row_id); 152 HistoryID history_id = static_cast<HistoryID>(row_id);
153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
154 154
155 // Add the row for quick lookup in the history info store. 155 // Add the row for quick lookup in the history info store.
156 URLRow new_row(GURL(url), row_id); 156 URLRow new_row(GURL(url), row_id);
157 new_row.set_visit_count(row.visit_count()); 157 new_row.set_visit_count(row.visit_count());
158 new_row.set_typed_count(row.typed_count()); 158 new_row.set_typed_count(row.typed_count());
159 new_row.set_last_visit(row.last_visit()); 159 new_row.set_last_visit(row.last_visit());
160 new_row.set_title(row.title()); 160 new_row.set_title(row.title());
161 history_info_map_[history_id] = new_row; 161 history_info_map_[history_id] = new_row;
162 162
163 // Index the words contained in the URL and title of the row. 163 // Index the words contained in the URL and title of the row.
164 AddRowWordsToIndex(new_row); 164 AddRowWordsToIndex(new_row);
165 return; 165 return true;
166 } 166 }
167 167
168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { 168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) {
169 HistoryID history_id = static_cast<HistoryID>(row.id()); 169 HistoryID history_id = static_cast<HistoryID>(row.id());
170 // Split URL into individual, unique words then add in the title words. 170 // Split URL into individual, unique words then add in the title words.
171 const GURL& gurl(row.url()); 171 const GURL& gurl(row.url());
172 string16 url(net::FormatUrl(gurl, languages_, 172 string16 url(net::FormatUrl(gurl, languages_,
173 net::kFormatUrlOmitUsernamePassword, 173 net::kFormatUrlOmitUsernamePassword,
174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
175 NULL, NULL, NULL)); 175 NULL, NULL, NULL));
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 if (iter != history_id_word_map_.end()) { 288 if (iter != history_id_word_map_.end()) {
289 WordIDSet& word_id_set(iter->second); 289 WordIDSet& word_id_set(iter->second);
290 word_id_set.insert(word_id); 290 word_id_set.insert(word_id);
291 } else { 291 } else {
292 WordIDSet word_id_set; 292 WordIDSet word_id_set;
293 word_id_set.insert(word_id); 293 word_id_set.insert(word_id);
294 history_id_word_map_[history_id] = word_id_set; 294 history_id_word_map_[history_id] = word_id_set;
295 } 295 }
296 } 296 }
297 297
298 void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) { 298 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {
299 // The row may or may not already be in our index. If it is not already 299 // The row may or may not already be in our index. If it is not already
300 // indexed and it qualifies then it gets indexed. If it is already 300 // indexed and it qualifies then it gets indexed. If it is already
301 // indexed and still qualifies then it gets updated, otherwise it 301 // indexed and still qualifies then it gets updated, otherwise it
302 // is deleted from the index. 302 // is deleted from the index.
303 bool row_was_updated = false;
304 URLID row_id = row.id();
303 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
304 if (row_pos == history_info_map_.end()) { 306 if (row_pos == history_info_map_.end()) {
305 // This new row should be indexed if it qualifies. 307 // This new row should be indexed if it qualifies.
306 URLRow new_row(row); 308 URLRow new_row(row);
307 new_row.set_id(row_id); 309 new_row.set_id(row_id);
308 if (RowQualifiesAsSignificant(new_row, base::Time())) 310 row_was_updated =
309 IndexRow(new_row); 311 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row);
310 } else if (RowQualifiesAsSignificant(row, base::Time())) { 312 } else if (RowQualifiesAsSignificant(row, base::Time())) {
311 // This indexed row still qualifies and will be re-indexed. 313 // This indexed row still qualifies and will be re-indexed.
312 // The url won't have changed but the title, visit count, etc. 314 // The url won't have changed but the title, visit count, etc.
313 // might have changed. 315 // might have changed.
314 URLRow& updated_row = row_pos->second; 316 URLRow& row_to_update = row_pos->second;
315 updated_row.set_visit_count(row.visit_count()); 317 bool title_updated = row_to_update.title() != row.title();
316 updated_row.set_typed_count(row.typed_count()); 318 if (row_to_update.visit_count() != row.visit_count() ||
317 updated_row.set_last_visit(row.last_visit()); 319 row_to_update.typed_count() != row.typed_count() ||
318 // While the URL is guaranteed to remain stable, the title may have changed. 320 row_to_update.last_visit() != row.last_visit() || title_updated) {
319 // If so, then we need to update the index with the changed words. 321 row_to_update.set_visit_count(row.visit_count());
320 if (updated_row.title() != row.title()) { 322 row_to_update.set_typed_count(row.typed_count());
321 // Clear all words associated with this row and re-index both the 323 row_to_update.set_last_visit(row.last_visit());
322 // URL and title. 324 // While the URL is guaranteed to remain stable, the title may have
323 RemoveRowWordsFromIndex(updated_row); 325 // changed. If so, then update the index with the changed words.
324 updated_row.set_title(row.title()); 326 if (title_updated) {
325 AddRowWordsToIndex(updated_row); 327 // Clear all words associated with this row and re-index both the
328 // URL and title.
329 RemoveRowWordsFromIndex(row_to_update);
330 row_to_update.set_title(row.title());
331 AddRowWordsToIndex(row_to_update);
332 }
333 row_was_updated = true;
326 } 334 }
327 } else { 335 } else {
328 // This indexed row no longer qualifies and will be de-indexed by 336 // This indexed row no longer qualifies and will be de-indexed by
329 // clearing all words associated with this row. 337 // clearing all words associated with this row.
330 URLRow& removed_row = row_pos->second; 338 RemoveRowFromIndex(row);
331 RemoveRowFromIndex(removed_row); 339 row_was_updated = true;
332 } 340 }
333 // This invalidates the cache. 341 if (row_was_updated)
334 search_term_cache_.clear(); 342 search_term_cache_.clear(); // This invalidates the cache.
343 return row_was_updated;
335 } 344 }
336 345
337 void URLIndexPrivateData::DeleteURL(URLID row_id) { 346 // Helper functor for DeleteURL.
338 // Note that this does not remove any reference to this row from the 347 class HistoryInfoMapItemHasURL {
339 // word_id_history_map_. That map will continue to contain (and return) 348 public:
340 // hits against this row until that map is rebuilt, but since the 349 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
341 // history_info_map_ no longer references the row no erroneous results 350
342 // will propagate to the user. 351 bool operator()(const std::pair<const HistoryID, URLRow>& item) {
343 history_info_map_.erase(row_id); 352 return item.second.url() == url_;
344 search_term_cache_.clear(); // This invalidates the word cache. 353 }
354
355 private:
356 const GURL& url_;
357 };
358
359 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
360 // Find the matching entry in the history_info_map_.
361 HistoryInfoMap::iterator pos = std::find_if(
362 history_info_map_.begin(),
363 history_info_map_.end(),
364 HistoryInfoMapItemHasURL(url));
365 if (pos == history_info_map_.end())
366 return false;
367 RemoveRowFromIndex(pos->second);
368 search_term_cache_.clear(); // This invalidates the cache.
369 return true;
345 } 370 }
346 371
347 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { 372 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {
348 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); 373 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
349 } 374 }
350 375
351 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- 376 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
352 377
353 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( 378 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
354 const HistoryInfoMap& history_info_map) 379 const HistoryInfoMap& history_info_map)
(...skipping 613 matching lines...) Expand 10 before | Expand all | Expand 10 after
968 map_entry->set_visit_count(url_row.visit_count()); 993 map_entry->set_visit_count(url_row.visit_count());
969 map_entry->set_typed_count(url_row.typed_count()); 994 map_entry->set_typed_count(url_row.typed_count());
970 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 995 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
971 map_entry->set_url(url_row.url().spec()); 996 map_entry->set_url(url_row.url().spec());
972 map_entry->set_title(UTF16ToUTF8(url_row.title())); 997 map_entry->set_title(UTF16ToUTF8(url_row.title()));
973 } 998 }
974 } 999 }
975 1000
976 // Cache Restoring ------------------------------------------------------------- 1001 // Cache Restoring -------------------------------------------------------------
977 1002
978 bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) {
979 Clear();
980
981 if (!history_db)
982 return false;
983
984 base::TimeTicks beginning_time = base::TimeTicks::Now();
985 URLDatabase::URLEnumerator history_enum;
986 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
987 return false;
988 URLRow row;
989 while (history_enum.GetNextURL(&row))
990 IndexRow(row);
991 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
992 base::TimeTicks::Now() - beginning_time);
993 return true;
994 }
995
996 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { 1003 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {
997 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 1004 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
998 // That is: ensure that the database has not been modified since the cache 1005 // That is: ensure that the database has not been modified since the cache
999 // was last saved. DB file modification date is inadequate. There are no 1006 // was last saved. DB file modification date is inadequate. There are no
1000 // SQLite table checksums automatically stored. 1007 // SQLite table checksums automatically stored.
1008 Clear(); // Start with a clean slate.
1009
1001 // FIXME(mrossetti): Move File IO to another thread. 1010 // FIXME(mrossetti): Move File IO to another thread.
1002 base::ThreadRestrictions::ScopedAllowIO allow_io; 1011 base::ThreadRestrictions::ScopedAllowIO allow_io;
1003 base::TimeTicks beginning_time = base::TimeTicks::Now(); 1012 base::TimeTicks beginning_time = base::TimeTicks::Now();
1013 if (!file_util::PathExists(file_path))
1014 return false;
1004 std::string data; 1015 std::string data;
1005 // If there is no cache file then simply give up. This will cause us to 1016 // If there is no cache file then simply give up. This will cause us to
1006 // attempt to rebuild from the history database. 1017 // attempt to rebuild from the history database.
1007 if (!file_util::ReadFileToString(file_path, &data)) 1018 if (!file_util::ReadFileToString(file_path, &data))
1008 return false; 1019 return false;
1009 1020
1010 InMemoryURLIndexCacheItem index_cache; 1021 InMemoryURLIndexCacheItem index_cache;
1011 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { 1022 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
1012 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " 1023 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
1013 << file_path.value(); 1024 << file_path.value();
1014 return false; 1025 return false;
1015 } 1026 }
1016 1027
1017 if (!RestorePrivateData(index_cache)) { 1028 if (!RestorePrivateData(index_cache)) {
1018 Clear(); // Back to square one -- must build from scratch. 1029 Clear(); // Back to square one -- must build from scratch.
1019 return false; 1030 return false;
1020 } 1031 }
1021 1032
1022 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 1033 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
1023 base::TimeTicks::Now() - beginning_time); 1034 base::TimeTicks::Now() - beginning_time);
1024 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 1035 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1025 history_id_word_map_.size()); 1036 history_id_word_map_.size());
1026 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 1037 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
1027 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 1038 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
1028 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 1039 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
1029 return true; 1040 return true;
1030 } 1041 }
1031 1042
1043 // static
1044 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(
1045 HistoryDatabase* history_db) {
1046 if (!history_db)
1047 return NULL;
1048
1049 base::TimeTicks beginning_time = base::TimeTicks::Now();
1050
1051 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
1052 URLDatabase::URLEnumerator history_enum;
1053 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
1054 return NULL;
1055 for (URLRow row; history_enum.GetNextURL(&row); )
1056 rebuilt_data->IndexRow(row);
1057
1058 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
1059 base::TimeTicks::Now() - beginning_time);
1060 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1061 rebuilt_data->history_id_word_map_.size());
1062 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
1063 rebuilt_data->word_map_.size());
1064 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
1065 rebuilt_data->char_word_map_.size());
1066 return rebuilt_data.release();
1067 }
1068
1032 bool URLIndexPrivateData::RestorePrivateData( 1069 bool URLIndexPrivateData::RestorePrivateData(
1033 const InMemoryURLIndexCacheItem& cache) { 1070 const InMemoryURLIndexCacheItem& cache) {
1034 return RestoreWordList(cache) && RestoreWordMap(cache) && 1071 return RestoreWordList(cache) && RestoreWordMap(cache) &&
1035 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && 1072 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
1036 RestoreHistoryInfoMap(cache); 1073 RestoreHistoryInfoMap(cache);
1037 } 1074 }
1038 1075
1039 bool URLIndexPrivateData::RestoreWordList( 1076 bool URLIndexPrivateData::RestoreWordList(
1040 const InMemoryURLIndexCacheItem& cache) { 1077 const InMemoryURLIndexCacheItem& cache) {
1041 if (!cache.has_word_list()) 1078 if (!cache.has_word_list())
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
1148 if (iter->has_title()) { 1185 if (iter->has_title()) {
1149 string16 title(UTF8ToUTF16(iter->title())); 1186 string16 title(UTF8ToUTF16(iter->title()));
1150 url_row.set_title(title); 1187 url_row.set_title(title);
1151 } 1188 }
1152 history_info_map_[history_id] = url_row; 1189 history_info_map_[history_id] = url_row;
1153 } 1190 }
1154 return true; 1191 return true;
1155 } 1192 }
1156 1193
1157 } // namespace history 1194 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698