Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 10872032: Revert 152946 - Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 #include <vector> 12 #include <vector>
13 13
14 #include "base/basictypes.h" 14 #include "base/basictypes.h"
15 #include "base/file_util.h" 15 #include "base/file_util.h"
16 #include "base/i18n/case_conversion.h" 16 #include "base/i18n/case_conversion.h"
17 #include "base/metrics/histogram.h"
17 #include "base/string_util.h" 18 #include "base/string_util.h"
19 #include "base/time.h"
18 #include "base/utf_string_conversions.h" 20 #include "base/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/autocomplete_provider.h" 21 #include "chrome/browser/autocomplete/autocomplete_provider.h"
22 #include "chrome/browser/autocomplete/url_prefix.h"
20 #include "chrome/browser/history/history_database.h" 23 #include "chrome/browser/history/history_database.h"
21 #include "chrome/browser/history/in_memory_url_cache_database.h" 24 #include "chrome/browser/history/in_memory_url_index.h"
22 #include "chrome/common/chrome_constants.h"
23 #include "chrome/common/url_constants.h"
24 #include "content/public/browser/browser_thread.h" 25 #include "content/public/browser/browser_thread.h"
25 #include "content/public/browser/notification_details.h" 26 #include "content/public/browser/notification_details.h"
26 #include "content/public/browser/notification_service.h" 27 #include "content/public/browser/notification_service.h"
27 #include "content/public/browser/notification_source.h" 28 #include "content/public/browser/notification_source.h"
28 #include "net/base/net_util.h" 29 #include "net/base/net_util.h"
30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
29 31
30 using content::BrowserThread; 32 using google::protobuf::RepeatedField;
33 using google::protobuf::RepeatedPtrField;
34 using in_memory_url_index::InMemoryURLIndexCacheItem;
31 35
32 namespace history { 36 namespace history {
33 37
34 // Comparison function for sorting search terms by descending length. 38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
35 bool LengthGreater(const string16& string_a, const string16& string_b) { 39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
36 return string_a.length() > string_b.length(); 40 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
37 } 41 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;
38 42 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry
39 // Initializes a whitelist of URL schemes. 43 CharWordMapEntry;
40 void InitializeSchemeWhitelist(std::set<std::string>* whitelist) { 44 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
41 DCHECK(whitelist); 45 WordIDHistoryMapItem;
42 if (!whitelist->empty()) 46 typedef imui::
43 return; // Nothing to do, already initialized. 47 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
44 whitelist->insert(std::string(chrome::kAboutScheme)); 48 WordIDHistoryMapEntry;
45 whitelist->insert(std::string(chrome::kChromeUIScheme)); 49 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
46 whitelist->insert(std::string(chrome::kFileScheme)); 50 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
47 whitelist->insert(std::string(chrome::kFtpScheme)); 51 HistoryInfoMapEntry;
48 whitelist->insert(std::string(chrome::kHttpScheme)); 52 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;
49 whitelist->insert(std::string(chrome::kHttpsScheme)); 53 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
50 whitelist->insert(std::string(chrome::kMailToScheme)); 54 WordStartsMapEntry;
51 }
52
53 // CacheTransaction ------------------------------------------------------------
54
55 // Simple automatic helper class encapsulating calls to BeginTransaction/
56 // CommitTransaction.
57 class CacheTransaction {
58 public:
59 explicit CacheTransaction(InMemoryURLCacheDatabase* db);
60 ~CacheTransaction();
61
62 private:
63 InMemoryURLCacheDatabase* db_;
64
65 DISALLOW_COPY_AND_ASSIGN(CacheTransaction);
66 };
67
68 CacheTransaction::CacheTransaction(InMemoryURLCacheDatabase* db)
69 : db_(db) {
70 if (db_)
71 db_->BeginTransaction();
72 }
73
74 CacheTransaction::~CacheTransaction() {
75 if (db_)
76 db_->CommitTransaction();
77 }
78
79 // URLIndexPrivateData Public (but only to InMemoryURLIndex) Functions ---------
80
81 URLIndexPrivateData::URLIndexPrivateData(const FilePath& history_dir,
82 const std::string& languages)
83 : history_dir_(history_dir),
84 languages_(languages),
85 cache_enabled_(true),
86 shutdown_(false),
87 lock_(),
88 pre_filter_item_count_(0),
89 post_filter_item_count_(0),
90 post_scoring_item_count_(0) {
91 InitializeSchemeWhitelist(&scheme_whitelist_);
92 }
93
94 bool URLIndexPrivateData::Init(
95 base::SequencedWorkerPool::SequenceToken sequence_token) {
96 // Since init occurs on the DB thread, it is possible that the profile was
97 // told to shutdown before this task had a chance to kick off.
98 base::AutoLock lock(lock_);
99 if (shutdown_)
100 return false;
101 cache_db_ = new InMemoryURLCacheDatabase();
102 FilePath dir_path;
103 set_cache_enabled(GetCacheDBPath(&dir_path) &&
104 cache_db_->Init(dir_path, sequence_token));
105 return cache_enabled();
106 }
107
108 void URLIndexPrivateData::Reset() {
109 Clear();
110 if (cache_enabled())
111 cache_db_->Reset();
112 }
113
114 bool URLIndexPrivateData::Empty() const {
115 return history_info_map_.empty();
116 }
117
118 URLIndexPrivateData* URLIndexPrivateData::Snapshot() const {
119 return new URLIndexPrivateData(*this);
120 }
121
122 void URLIndexPrivateData::Shutdown() {
123 base::AutoLock lock(lock_);
124 shutdown_ = true;
125 if (cache_enabled())
126 cache_db_->Shutdown();
127 }
128
129 // Helper predicates for ValidateConsistency.
130 template <typename Pair>
131 struct SelectFirst : std::unary_function<Pair, typename Pair::first_type> {
132 const typename Pair::first_type& operator()(const Pair& x) const {
133 return x.first;
134 }
135 };
136
137 template <typename Pair>
138 struct SelectSecond : std::unary_function<Pair, typename Pair::second_type> {
139 const typename Pair::second_type& operator()(const Pair& x) const {
140 return x.second;
141 }
142 };
143
144 template <typename SourcePair, typename Target>
145 struct TargetInserter : std::unary_function<SourcePair, void> {
146 explicit TargetInserter(Target* target) : target_(target) {}
147
148 void operator()(const SourcePair& source) {
149 target_->insert(source.second.begin(), source.second.end());
150 }
151
152 Target* target_;
153 };
154
155 bool URLIndexPrivateData::ValidateConsistency() const {
156 // Scope things so that a large data set doesn't unnecessarily accumulate and
157 // hang around.
158 {
159 // Make a set of WordIDs from word_map_.
160 WordIDSet word_id_set_a;
161 std::transform(word_map_.begin(), word_map_.end(),
162 std::inserter(word_id_set_a, word_id_set_a.begin()),
163 SelectSecond<WordMap::value_type>());
164
165 // Compare word_map_'s word set to the words from word_id_history_map_.
166 {
167 WordIDSet word_id_set_b;
168 std::transform(word_id_history_map_.begin(), word_id_history_map_.end(),
169 std::inserter(word_id_set_b, word_id_set_b.begin()),
170 SelectFirst<WordIDHistoryMap::value_type>());
171 WordIDSet leftovers;
172 std::set_symmetric_difference(
173 word_id_set_a.begin(), word_id_set_a.end(),
174 word_id_set_b.begin(), word_id_set_b.end(),
175 std::inserter(leftovers, leftovers.begin()));
176 if (!leftovers.empty())
177 return false;
178 }
179
180 // Compare word_map_'s word set to the words from history_id_word_map_.
181 {
182 WordIDSet word_id_set_b;
183 std::for_each(history_id_word_map_.begin(), history_id_word_map_.end(),
184 TargetInserter<HistoryIDWordMap::value_type,
185 WordIDSet>(&word_id_set_b));
186 WordIDSet leftovers;
187 std::set_symmetric_difference(
188 word_id_set_a.begin(), word_id_set_a.end(),
189 word_id_set_b.begin(), word_id_set_b.end(),
190 std::inserter(leftovers, leftovers.begin()));
191 if (!leftovers.empty())
192 return false;
193 }
194
195 // Compare word_map_'s word set to the words from char_word_map_.
196 {
197 WordIDSet word_id_set_b;
198 std::for_each(char_word_map_.begin(), char_word_map_.end(),
199 TargetInserter<CharWordIDMap::value_type,
200 WordIDSet>(&word_id_set_b));
201 WordIDSet leftovers;
202 std::set_symmetric_difference(
203 word_id_set_a.begin(), word_id_set_a.end(),
204 word_id_set_b.begin(), word_id_set_b.end(),
205 std::inserter(leftovers, leftovers.begin()));
206 if (!leftovers.empty())
207 return false;
208 }
209
210 // Intersect available_words_ with set of WordIDs (created above from
211 // word_map_) and test for no common WordIDs.
212 {
213 WordIDSet leftovers;
214 std::set_intersection(word_id_set_a.begin(), word_id_set_a.end(),
215 available_words_.begin(), available_words_.end(),
216 std::inserter(leftovers, leftovers.begin()));
217 if (!leftovers.empty())
218 return false;
219 }
220 }
221
222 {
223 // Make a set of HistoryIDs from history_info_map_.
224 HistoryIDSet history_id_set_a;
225 std::transform(history_info_map_.begin(), history_info_map_.end(),
226 std::inserter(history_id_set_a, history_id_set_a.begin()),
227 SelectFirst<HistoryInfoMap::value_type>());
228
229 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.
230 {
231 HistoryIDSet history_id_set_b;
232 std::transform(history_info_map_.begin(), history_info_map_.end(),
233 std::inserter(history_id_set_b, history_id_set_b.begin()),
234 SelectFirst<HistoryInfoMap::value_type>());
235 HistoryIDSet leftovers;
236 std::set_symmetric_difference(
237 history_id_set_a.begin(), history_id_set_a.end(),
238 history_id_set_b.begin(), history_id_set_b.end(),
239 std::inserter(leftovers, leftovers.begin()));
240 if (!leftovers.empty())
241 return false;
242 }
243
244 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.
245 {
246 HistoryIDSet history_id_set_b;
247 std::for_each(word_id_history_map_.begin(), word_id_history_map_.end(),
248 TargetInserter<WordIDHistoryMap::value_type,
249 HistoryIDSet>(&history_id_set_b));
250 HistoryIDSet leftovers;
251 std::set_symmetric_difference(
252 history_id_set_a.begin(), history_id_set_a.end(),
253 history_id_set_b.begin(), history_id_set_b.end(),
254 std::inserter(leftovers, leftovers.begin()));
255 if (!leftovers.empty())
256 return false;
257 }
258
259 // Compare history_info_map_'s set to word_starts_map_'s HistoryIDs.
260 {
261 HistoryIDSet history_id_set_b;
262 std::transform(word_starts_map_.begin(), word_starts_map_.end(),
263 std::inserter(history_id_set_b, history_id_set_b.begin()),
264 SelectFirst<WordStartsMap::value_type>());
265 HistoryIDSet leftovers;
266 std::set_symmetric_difference(
267 history_id_set_a.begin(), history_id_set_a.end(),
268 history_id_set_b.begin(), history_id_set_b.end(),
269 std::inserter(leftovers, leftovers.begin()));
270 if (!leftovers.empty())
271 return false;
272 }
273 }
274
275 // Make sets of words from word_list_ and word_map_ and test for equality.
276 {
277 String16Set word_list_set;
278 std::copy(word_list_.begin(), word_list_.end(),
279 std::inserter(word_list_set, word_list_set.end()));
280 String16Set word_map_set;
281 std::transform(word_map_.begin(), word_map_.end(),
282 std::inserter(word_map_set, word_map_set.begin()),
283 SelectFirst<WordMap::value_type>());
284 if (word_list_set != word_map_set)
285 return false;
286 }
287
288 return true;
289 }
290
291 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
292 const string16& search_string) {
293 pre_filter_item_count_ = 0;
294 post_filter_item_count_ = 0;
295 post_scoring_item_count_ = 0;
296 // The search string we receive may contain escaped characters. For reducing
297 // the index we need individual, lower-cased words, ignoring escapings. For
298 // the final filtering we need whitespace separated substrings possibly
299 // containing escaped characters.
300 string16 lower_raw_string(base::i18n::ToLower(search_string));
301 string16 lower_unescaped_string =
302 net::UnescapeURLComponent(lower_raw_string,
303 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
304 // Extract individual 'words' (as opposed to 'terms'; see below) from the
305 // search string. When the user types "colspec=ID%20Mstone Release" we get
306 // four 'words': "colspec", "id", "mstone" and "release".
307 String16Vector lower_words(
308 history::String16VectorFromString16(lower_unescaped_string, false, NULL));
309 ScoredHistoryMatches scored_items;
310
311 // Do nothing if we have indexed no words (probably because we've not been
312 // initialized yet) or the search string has no words.
313 if (word_list_.empty() || lower_words.empty()) {
314 search_term_cache_.clear(); // Invalidate the term cache.
315 return scored_items;
316 }
317
318 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
319 // approach.
320 ResetSearchTermCache();
321
322 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
323
324 // Trim the candidate pool if it is large. Note that we do not filter out
325 // items that do not contain the search terms as proper substrings -- doing
326 // so is the performance-costly operation we are trying to avoid in order
327 // to maintain omnibox responsiveness.
328 const size_t kItemsToScoreLimit = 500;
329 pre_filter_item_count_ = history_id_set.size();
330 // If we trim the results set we do not want to cache the results for next
331 // time as the user's ultimately desired result could easily be eliminated
332 // in this early rough filter.
333 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
334 if (was_trimmed) {
335 HistoryIDVector history_ids;
336 std::copy(history_id_set.begin(), history_id_set.end(),
337 std::back_inserter(history_ids));
338 // Trim down the set by sorting by typed-count, visit-count, and last
339 // visit.
340 HistoryItemFactorGreater
341 item_factor_functor(history_info_map_);
342 std::partial_sort(history_ids.begin(),
343 history_ids.begin() + kItemsToScoreLimit,
344 history_ids.end(),
345 item_factor_functor);
346 history_id_set.clear();
347 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
348 std::inserter(history_id_set, history_id_set.end()));
349 post_filter_item_count_ = history_id_set.size();
350 }
351
352 // Pass over all of the candidates filtering out any without a proper
353 // substring match, inserting those which pass in order by score. Note that
354 // in this step we are using the raw search string complete with escaped
355 // URL elements. When the user has specifically typed something akin to
356 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
357 // specific substring appears in the URL or page title.
358
359 // We call these 'terms' (as opposed to 'words'; see above) as in this case
360 // we only want to break up the search string on 'true' whitespace rather than
361 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
362 // get two 'terms': "colspec=id%20mstone" and "release".
363 history::String16Vector lower_raw_terms;
364 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);
365 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
366 AddHistoryMatch(*this, lower_raw_string,
367 lower_raw_terms, base::Time::Now())).ScoredMatches();
368
369 // Select and sort only the top kMaxMatches results.
370 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
371 std::partial_sort(scored_items.begin(),
372 scored_items.begin() +
373 AutocompleteProvider::kMaxMatches,
374 scored_items.end(),
375 ScoredHistoryMatch::MatchScoreGreater);
376 scored_items.resize(AutocompleteProvider::kMaxMatches);
377 } else {
378 std::sort(scored_items.begin(), scored_items.end(),
379 ScoredHistoryMatch::MatchScoreGreater);
380 }
381 post_scoring_item_count_ = scored_items.size();
382
383 if (was_trimmed) {
384 search_term_cache_.clear(); // Invalidate the term cache.
385 } else {
386 // Remove any stale SearchTermCacheItems.
387 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
388 cache_iter != search_term_cache_.end(); ) {
389 if (!cache_iter->second.used_)
390 search_term_cache_.erase(cache_iter++);
391 else
392 ++cache_iter;
393 }
394 }
395
396 return scored_items;
397 }
398
399 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {
400 // The row may or may not already be in our index. If it is not already
401 // indexed and it qualifies then it gets indexed. If it is already
402 // indexed and still qualifies then it gets updated, otherwise it
403 // is deleted from the index.
404 bool row_was_updated = false;
405 URLID row_id = row.id();
406 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
407 if (row_pos == history_info_map_.end()) {
408 // This new row should be indexed if it qualifies.
409 URLRow new_row(row);
410 new_row.set_id(row_id);
411 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
412 IndexRow(new_row);
413 } else if (RowQualifiesAsSignificant(row, base::Time())) {
414 // This indexed row still qualifies and will be re-indexed.
415 // The url won't have changed but the title, visit count, etc.
416 // might have changed.
417 URLRow& row_to_update = row_pos->second;
418 bool title_updated = row_to_update.title() != row.title();
419 if (row_to_update.visit_count() != row.visit_count() ||
420 row_to_update.typed_count() != row.typed_count() ||
421 row_to_update.last_visit() != row.last_visit() || title_updated) {
422 CacheTransaction cache_transaction(cache_enabled() ?
423 cache_db_.get() : NULL);
424 row_to_update.set_visit_count(row.visit_count());
425 row_to_update.set_typed_count(row.typed_count());
426 row_to_update.set_last_visit(row.last_visit());
427 row_to_update.set_hidden(row.hidden());
428 // While the URL is guaranteed to remain stable, the title may have
429 // changed. If so, then update the index with the changed words.
430 if (title_updated) {
431 // Clear all words associated with this row and re-index both the
432 // URL and title.
433 RemoveRowWordsFromIndex(row_to_update);
434 row_to_update.set_title(row.title());
435 RowWordStarts word_starts;
436 AddRowWordsToIndex(row_to_update, &word_starts);
437 word_starts_map_[row_id] = word_starts;
438 // Replace all word_starts for the row.
439 if (cache_enabled()) {
440 cache_db_->RemoveWordStarts(row_id);
441 cache_db_->AddRowWordStarts(row_id, word_starts);
442 }
443 }
444 if (cache_enabled()) {
445 cache_db_->RemoveHistoryIDFromURLs(row_id);
446 cache_db_->AddHistoryToURLs(row_id, row);
447 }
448 row_was_updated = true;
449 }
450 } else {
451 // This indexed row no longer qualifies and will be de-indexed by
452 // clearing all words associated with this row.
453 RemoveRowFromIndex(row);
454 row_was_updated = true;
455 }
456 if (row_was_updated)
457 search_term_cache_.clear(); // This invalidates the cache.
458 return row_was_updated;
459 }
460
461 // Helper functor for DeleteURL.
462 class HistoryInfoMapItemHasURL {
463 public:
464 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
465
466 bool operator()(const std::pair<const HistoryID, URLRow>& item) {
467 return item.second.url() == url_;
468 }
469
470 private:
471 const GURL& url_;
472 };
473
474 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
475 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
476 BrowserThread::CurrentlyOn(BrowserThread::UI));
477 // Find the matching entry in the history_info_map_.
478 HistoryInfoMap::iterator pos = std::find_if(
479 history_info_map_.begin(),
480 history_info_map_.end(),
481 HistoryInfoMapItemHasURL(url));
482 if (pos == history_info_map_.end())
483 return false;
484 RemoveRowFromIndex(pos->second);
485 search_term_cache_.clear(); // This invalidates the cache.
486 return true;
487 }
488
489 bool URLIndexPrivateData::RestoreFromCacheTask() {
490 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
491 !BrowserThread::CurrentlyOn(BrowserThread::UI));
492 DCHECK(cache_db_);
493 if (IsShutdown())
494 return false;
495 Clear();
496 if (RestoreFromCache(cache_db_))
497 return true;
498 // If there was no SQLite-based cache then there might be an old,
499 // deprecated, protobuf-based cache file laying around. Get rid of it.
500 DeleteOldVersionCacheFile();
501 return false;
502 }
503
504 // static
505 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
506 HistoryDatabase* history_db,
507 scoped_refptr<URLIndexPrivateData> old_data) {
508 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
509 !BrowserThread::CurrentlyOn(BrowserThread::UI));
510 if (!history_db)
511 return NULL;
512
513 scoped_refptr<URLIndexPrivateData> rebuilt_data(
514 new URLIndexPrivateData(*(old_data.get())));
515
516 // NOTE: We disable the cache database until after the replacement private
517 // data has been created and then we re-enable the database. Once the private
518 // data has been swapped in the database is refreshed with the new data.
519 URLDatabase::URLEnumerator history_enum;
520 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
521 return NULL;
522
523 for (URLRow row; !old_data->IsShutdown() && history_enum.GetNextURL(&row); )
524 rebuilt_data->IndexRow(row);
525
526 if (old_data->IsShutdown()) {
527 rebuilt_data.release();
528 }
529
530 return rebuilt_data;
531 }
532
533 void URLIndexPrivateData::RefreshCacheTask() {
534 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
535 !BrowserThread::CurrentlyOn(BrowserThread::UI));
536 base::AutoLock lock(lock_);
537 if (shutdown_ || !cache_enabled())
538 return;
539 // Should a refresh fail for any reason we consider the database to be
540 // corrupt and unavailable; no further remedial action is possible.
541 set_cache_enabled(cache_db_->Reset() && cache_db_->Refresh(*this));
542 }
543
544 // static
545 void URLIndexPrivateData::InitializeSchemeWhitelistForTesting(
546 std::set<std::string>* whitelist) {
547 InitializeSchemeWhitelist(whitelist);
548 }
549 55
550 // SearchTermCacheItem --------------------------------------------------------- 56 // SearchTermCacheItem ---------------------------------------------------------
551 57
552 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( 58 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
553 const WordIDSet& word_id_set, 59 const WordIDSet& word_id_set,
554 const HistoryIDSet& history_id_set) 60 const HistoryIDSet& history_id_set)
555 : word_id_set_(word_id_set), 61 : word_id_set_(word_id_set),
556 history_id_set_(history_id_set), 62 history_id_set_(history_id_set),
557 used_(true) {} 63 used_(true) {}
558 64
559 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() 65 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()
560 : used_(true) {} 66 : used_(true) {}
561 67
562 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {} 68 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}
563 69
564 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- 70 // Algorithm Functions ---------------------------------------------------------
565 71
566 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( 72 // Comparison function for sorting search terms by descending length.
567 const URLIndexPrivateData& private_data, 73 bool LengthGreater(const string16& string_a, const string16& string_b) {
568 const string16& lower_string, 74 return string_a.length() > string_b.length();
569 const String16Vector& lower_terms,
570 const base::Time now)
571 : private_data_(private_data),
572 lower_string_(lower_string),
573 lower_terms_(lower_terms),
574 now_(now) {}
575
576 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
577
578 void URLIndexPrivateData::AddHistoryMatch::operator()(
579 const HistoryID history_id) {
580 HistoryInfoMap::const_iterator hist_pos =
581 private_data_.history_info_map_.find(history_id);
582 if (hist_pos != private_data_.history_info_map_.end()) {
583 const URLRow& hist_item = hist_pos->second;
584 WordStartsMap::const_iterator starts_pos =
585 private_data_.word_starts_map_.find(history_id);
586 DCHECK(starts_pos != private_data_.word_starts_map_.end());
587 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,
588 starts_pos->second, now_);
589 if (match.raw_score > 0)
590 scored_matches_.push_back(match);
591 }
592 } 75 }
593 76
594 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- 77 // InMemoryURLIndex's Private Data ---------------------------------------------
595 78
596 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( 79 URLIndexPrivateData::URLIndexPrivateData()
597 const HistoryInfoMap& history_info_map) 80 : restored_cache_version_(0),
598 : history_info_map_(history_info_map) { 81 saved_cache_version_(kCurrentCacheFileVersion),
599 }
600
601 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
602
603 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
604 const HistoryID h1,
605 const HistoryID h2) {
606 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
607 if (entry1 == history_info_map_.end())
608 return false;
609 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
610 if (entry2 == history_info_map_.end())
611 return true;
612 const URLRow& r1(entry1->second);
613 const URLRow& r2(entry2->second);
614 // First cut: typed count, visit count, recency.
615 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
616 // recently visited (within the last 12/24 hours) as highly important. Get
617 // input from mpearson.
618 if (r1.typed_count() != r2.typed_count())
619 return (r1.typed_count() > r2.typed_count());
620 if (r1.visit_count() != r2.visit_count())
621 return (r1.visit_count() > r2.visit_count());
622 return (r1.last_visit() > r2.last_visit());
623 }
624
625 // URLIndexPrivateData Private Functions ---------------------------------------
626
627 URLIndexPrivateData::URLIndexPrivateData(const URLIndexPrivateData& old_data)
628 : history_dir_(old_data.history_dir_),
629 languages_(old_data.languages_),
630 cache_enabled_(old_data.cache_enabled_),
631 shutdown_(old_data.IsShutdown()),
632 lock_(),
633 scheme_whitelist_(old_data.scheme_whitelist_),
634 pre_filter_item_count_(0), 82 pre_filter_item_count_(0),
635 post_filter_item_count_(0), 83 post_filter_item_count_(0),
636 post_scoring_item_count_(0) { 84 post_scoring_item_count_(0) {
637 cache_db_ = old_data.cache_db_;
638 }
639
640 // Called only by unit tests.
641 URLIndexPrivateData::URLIndexPrivateData()
642 : cache_enabled_(true),
643 shutdown_(false),
644 lock_(),
645 pre_filter_item_count_(0),
646 post_filter_item_count_(0),
647 post_scoring_item_count_(0) {
648 InitializeSchemeWhitelist(&scheme_whitelist_);
649 } 85 }
650 86
651 URLIndexPrivateData::~URLIndexPrivateData() {} 87 URLIndexPrivateData::~URLIndexPrivateData() {}
652 88
653 bool URLIndexPrivateData::IsShutdown() const {
654 base::AutoLock lock(lock_);
655 return shutdown_;
656 }
657
658 void URLIndexPrivateData::Clear() { 89 void URLIndexPrivateData::Clear() {
659 word_list_.clear(); 90 word_list_.clear();
660 available_words_.clear(); 91 available_words_.clear();
661 word_map_.clear(); 92 word_map_.clear();
662 char_word_map_.clear(); 93 char_word_map_.clear();
663 word_id_history_map_.clear(); 94 word_id_history_map_.clear();
664 history_id_word_map_.clear(); 95 history_id_word_map_.clear();
665 history_info_map_.clear(); 96 history_info_map_.clear();
666 word_starts_map_.clear(); 97 word_starts_map_.clear();
667 } 98 }
668 99
100 bool URLIndexPrivateData::Empty() const {
101 return history_info_map_.empty();
102 }
103
104 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
105 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
106 data_copy->word_list_ = word_list_;
107 data_copy->available_words_ = available_words_;
108 data_copy->word_map_ = word_map_;
109 data_copy->char_word_map_ = char_word_map_;
110 data_copy->word_id_history_map_ = word_id_history_map_;
111 data_copy->history_id_word_map_ = history_id_word_map_;
112 data_copy->history_info_map_ = history_info_map_;
113 data_copy->word_starts_map_ = word_starts_map_;
114 return data_copy;
115 // Not copied:
116 // search_term_cache_
117 // pre_filter_item_count_
118 // post_filter_item_count_
119 // post_scoring_item_count_
120 };
121
669 // Cache Updating -------------------------------------------------------------- 122 // Cache Updating --------------------------------------------------------------
670 123
671 bool URLIndexPrivateData::IndexRow(const URLRow& row) { 124 bool URLIndexPrivateData::IndexRow(
125 const URLRow& row,
126 const std::string& languages,
127 const std::set<std::string>& scheme_whitelist) {
672 const GURL& gurl(row.url()); 128 const GURL& gurl(row.url());
673 129
674 // Index only URLs with a whitelisted scheme. 130 // Index only URLs with a whitelisted scheme.
675 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist_)) 131 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
676 return false; 132 return false;
677 133
134 URLID row_id = row.id();
678 // Strip out username and password before saving and indexing. 135 // Strip out username and password before saving and indexing.
679 string16 url(net::FormatUrl(gurl, languages_, 136 string16 url(net::FormatUrl(gurl, languages,
680 net::kFormatUrlOmitUsernamePassword, 137 net::kFormatUrlOmitUsernamePassword,
681 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 138 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
682 NULL, NULL, NULL)); 139 NULL, NULL, NULL));
683 140
684 URLID row_id = row.id();
685 HistoryID history_id = static_cast<HistoryID>(row_id); 141 HistoryID history_id = static_cast<HistoryID>(row_id);
686 DCHECK(history_info_map_.find(history_id) == history_info_map_.end());
687 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 142 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
688 143
689 // Add the row for quick lookup in the history info store. 144 // Add the row for quick lookup in the history info store.
690 URLRow new_row(GURL(url), row_id); 145 URLRow new_row(GURL(url), row_id);
691 new_row.set_visit_count(row.visit_count()); 146 new_row.set_visit_count(row.visit_count());
692 new_row.set_typed_count(row.typed_count()); 147 new_row.set_typed_count(row.typed_count());
693 new_row.set_last_visit(row.last_visit()); 148 new_row.set_last_visit(row.last_visit());
694 new_row.set_title(row.title()); 149 new_row.set_title(row.title());
695
696 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);
697 history_info_map_[history_id] = new_row; 150 history_info_map_[history_id] = new_row;
698 if (cache_enabled())
699 cache_db_->AddHistoryToURLs(history_id, row);
700 151
701 // Index the words contained in the URL and title of the row. 152 // Index the words contained in the URL and title of the row.
702 RowWordStarts word_starts; 153 RowWordStarts word_starts;
703 AddRowWordsToIndex(new_row, &word_starts); 154 AddRowWordsToIndex(new_row, &word_starts, languages);
704 word_starts_map_[history_id] = word_starts; 155 word_starts_map_[history_id] = word_starts;
705 if (cache_enabled())
706 cache_db_->AddRowWordStarts(history_id, word_starts);
707 return true; 156 return true;
708 } 157 }
709 158
710 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, 159 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
711 RowWordStarts* word_starts) { 160 RowWordStarts* word_starts,
161 const std::string& languages) {
712 HistoryID history_id = static_cast<HistoryID>(row.id()); 162 HistoryID history_id = static_cast<HistoryID>(row.id());
713 // Split URL into individual, unique words then add in the title words. 163 // Split URL into individual, unique words then add in the title words.
714 const GURL& gurl(row.url()); 164 const GURL& gurl(row.url());
715 string16 url(net::FormatUrl(gurl, languages_, 165 string16 url(net::FormatUrl(gurl, languages,
716 net::kFormatUrlOmitUsernamePassword, 166 net::kFormatUrlOmitUsernamePassword,
717 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 167 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
718 NULL, NULL, NULL)); 168 NULL, NULL, NULL));
719 url = base::i18n::ToLower(url); 169 url = base::i18n::ToLower(url);
720 String16Set url_words = String16SetFromString16(url, 170 String16Set url_words = String16SetFromString16(url,
721 word_starts ? &word_starts->url_word_starts_ : NULL); 171 word_starts ? &word_starts->url_word_starts_ : NULL);
722 String16Set title_words = String16SetFromString16(row.title(), 172 String16Set title_words = String16SetFromString16(row.title(),
723 word_starts ? &word_starts->title_word_starts_ : NULL); 173 word_starts ? &word_starts->title_word_starts_ : NULL);
724 String16Set words; 174 String16Set words;
725 std::set_union(url_words.begin(), url_words.end(), 175 std::set_union(url_words.begin(), url_words.end(),
(...skipping 12 matching lines...) Expand all
738 if (word_pos != word_map_.end()) 188 if (word_pos != word_map_.end())
739 UpdateWordHistory(word_pos->second, history_id); 189 UpdateWordHistory(word_pos->second, history_id);
740 else 190 else
741 AddWordHistory(term, history_id); 191 AddWordHistory(term, history_id);
742 } 192 }
743 193
744 void URLIndexPrivateData::UpdateWordHistory(WordID word_id, 194 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
745 HistoryID history_id) { 195 HistoryID history_id) {
746 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); 196 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
747 DCHECK(history_pos != word_id_history_map_.end()); 197 DCHECK(history_pos != word_id_history_map_.end());
748 // There is no need to record changes to the word_id_history_map_ in the cache
749 // because it can be rebuilt from the history_id_word_map_'s table.
750 HistoryIDSet& history_id_set(history_pos->second); 198 HistoryIDSet& history_id_set(history_pos->second);
751 history_id_set.insert(history_id); 199 history_id_set.insert(history_id);
752 AddToHistoryIDWordMap(history_id, word_id); 200 AddToHistoryIDWordMap(history_id, word_id);
753
754 // Add word_id/history_id entry to the word_history table
755 if (cache_enabled())
756 cache_db_->AddHistoryToWordHistory(word_id, history_id);
757 } 201 }
758 202
759 void URLIndexPrivateData::AddWordHistory(const string16& term, 203 void URLIndexPrivateData::AddWordHistory(const string16& term,
760 HistoryID history_id) { 204 HistoryID history_id) {
761 WordID word_id = word_list_.size(); 205 WordID word_id = word_list_.size();
762 if (available_words_.empty()) { 206 if (available_words_.empty()) {
763 word_list_.push_back(term); 207 word_list_.push_back(term);
764 } else { 208 } else {
765 word_id = *(available_words_.begin()); 209 word_id = *(available_words_.begin());
766 word_list_[word_id] = term; 210 word_list_[word_id] = term;
767 available_words_.erase(word_id); 211 available_words_.erase(word_id);
768 } 212 }
769 word_map_[term] = word_id; 213 word_map_[term] = word_id;
770 214
771 // Add word_id/term entry to the words table;
772 if (cache_enabled())
773 cache_db_->AddWordToWords(word_id, term);
774
775 HistoryIDSet history_id_set; 215 HistoryIDSet history_id_set;
776 history_id_set.insert(history_id); 216 history_id_set.insert(history_id);
777 word_id_history_map_[word_id] = history_id_set; 217 word_id_history_map_[word_id] = history_id_set;
778 AddToHistoryIDWordMap(history_id, word_id); 218 AddToHistoryIDWordMap(history_id, word_id);
779 219
780 // Add word_id/history_id entry to the word_history table
781 if (cache_enabled())
782 cache_db_->AddHistoryToWordHistory(word_id, history_id);
783
784 // For each character in the newly added word (i.e. a word that is not 220 // For each character in the newly added word (i.e. a word that is not
785 // already in the word index), add the word to the character index. 221 // already in the word index), add the word to the character index.
786 Char16Set characters = Char16SetFromString16(term); 222 Char16Set characters = Char16SetFromString16(term);
787 for (Char16Set::iterator uni_char_iter = characters.begin(); 223 for (Char16Set::iterator uni_char_iter = characters.begin();
788 uni_char_iter != characters.end(); ++uni_char_iter) { 224 uni_char_iter != characters.end(); ++uni_char_iter) {
789 char16 uni_char = *uni_char_iter; 225 char16 uni_char = *uni_char_iter;
790 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 226 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
791 if (char_iter != char_word_map_.end()) { 227 if (char_iter != char_word_map_.end()) {
792 // Update existing entry in the char/word index. 228 // Update existing entry in the char/word index.
793 WordIDSet& word_id_set(char_iter->second); 229 WordIDSet& word_id_set(char_iter->second);
794 word_id_set.insert(word_id); 230 word_id_set.insert(word_id);
795 } else { 231 } else {
796 // Create a new entry in the char/word index. 232 // Create a new entry in the char/word index.
797 WordIDSet word_id_set; 233 WordIDSet word_id_set;
798 word_id_set.insert(word_id); 234 word_id_set.insert(word_id);
799 char_word_map_[uni_char] = word_id_set; 235 char_word_map_[uni_char] = word_id_set;
800 } 236 }
801 // Add uni_char/word_id entry to the char_words database table.
802 if (cache_enabled())
803 cache_db_->AddWordToCharWords(uni_char, word_id);
804 } 237 }
805 } 238 }
806 239
807 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { 240 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {
808 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);
809 RemoveRowWordsFromIndex(row); 241 RemoveRowWordsFromIndex(row);
810 HistoryID history_id = static_cast<HistoryID>(row.id()); 242 HistoryID history_id = static_cast<HistoryID>(row.id());
811 history_info_map_.erase(history_id); 243 history_info_map_.erase(history_id);
812 word_starts_map_.erase(history_id); 244 word_starts_map_.erase(history_id);
813 if (cache_enabled())
814 cache_db_->RemoveHistoryIDFromURLs(history_id);
815 } 245 }
816 246
817 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { 247 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {
818 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 248 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
819 // this row. 249 // this row.
820 HistoryID history_id = static_cast<HistoryID>(row.id()); 250 HistoryID history_id = static_cast<HistoryID>(row.id());
821 WordIDSet word_id_set = history_id_word_map_[history_id]; 251 WordIDSet word_id_set = history_id_word_map_[history_id];
822 history_id_word_map_.erase(history_id); 252 history_id_word_map_.erase(history_id);
823 253
824 // Reconcile any changes to word usage. 254 // Reconcile any changes to word usage.
(...skipping 13 matching lines...) Expand all
838 char_word_map_[uni_char].erase(word_id); 268 char_word_map_[uni_char].erase(word_id);
839 if (char_word_map_[uni_char].empty()) 269 if (char_word_map_[uni_char].empty())
840 char_word_map_.erase(uni_char); // No longer in use. 270 char_word_map_.erase(uni_char); // No longer in use.
841 } 271 }
842 272
843 // Complete the removal of references to the word. 273 // Complete the removal of references to the word.
844 word_id_history_map_.erase(word_id); 274 word_id_history_map_.erase(word_id);
845 word_map_.erase(word); 275 word_map_.erase(word);
846 word_list_[word_id] = string16(); 276 word_list_[word_id] = string16();
847 available_words_.insert(word_id); 277 available_words_.insert(word_id);
848 if (cache_enabled())
849 cache_db_->RemoveWordFromWords(word_id);
850 } 278 }
851 if (cache_enabled())
852 cache_db_->RemoveHistoryIDFromWordHistory(history_id);
853 } 279 }
854 280
855 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, 281 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
856 WordID word_id) { 282 WordID word_id) {
857 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); 283 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
858 if (iter != history_id_word_map_.end()) { 284 if (iter != history_id_word_map_.end()) {
859 WordIDSet& word_id_set(iter->second); 285 WordIDSet& word_id_set(iter->second);
860 word_id_set.insert(word_id); 286 word_id_set.insert(word_id);
861 } else { 287 } else {
862 WordIDSet word_id_set; 288 WordIDSet word_id_set;
863 word_id_set.insert(word_id); 289 word_id_set.insert(word_id);
864 history_id_word_map_[history_id] = word_id_set; 290 history_id_word_map_[history_id] = word_id_set;
865 } 291 }
866 } 292 }
867 293
294 bool URLIndexPrivateData::UpdateURL(
295 const URLRow& row,
296 const std::string& languages,
297 const std::set<std::string>& scheme_whitelist) {
298 // The row may or may not already be in our index. If it is not already
299 // indexed and it qualifies then it gets indexed. If it is already
300 // indexed and still qualifies then it gets updated, otherwise it
301 // is deleted from the index.
302 bool row_was_updated = false;
303 URLID row_id = row.id();
304 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
305 if (row_pos == history_info_map_.end()) {
306 // This new row should be indexed if it qualifies.
307 URLRow new_row(row);
308 new_row.set_id(row_id);
309 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
310 IndexRow(new_row, languages, scheme_whitelist);
311 } else if (RowQualifiesAsSignificant(row, base::Time())) {
312 // This indexed row still qualifies and will be re-indexed.
313 // The url won't have changed but the title, visit count, etc.
314 // might have changed.
315 URLRow& row_to_update = row_pos->second;
316 bool title_updated = row_to_update.title() != row.title();
317 if (row_to_update.visit_count() != row.visit_count() ||
318 row_to_update.typed_count() != row.typed_count() ||
319 row_to_update.last_visit() != row.last_visit() || title_updated) {
320 row_to_update.set_visit_count(row.visit_count());
321 row_to_update.set_typed_count(row.typed_count());
322 row_to_update.set_last_visit(row.last_visit());
323 // While the URL is guaranteed to remain stable, the title may have
324 // changed. If so, then update the index with the changed words.
325 if (title_updated) {
326 // Clear all words associated with this row and re-index both the
327 // URL and title.
328 RemoveRowWordsFromIndex(row_to_update);
329 row_to_update.set_title(row.title());
330 RowWordStarts word_starts;
331 AddRowWordsToIndex(row_to_update, &word_starts, languages);
332 word_starts_map_[row_id] = word_starts;
333 }
334 row_was_updated = true;
335 }
336 } else {
337 // This indexed row no longer qualifies and will be de-indexed by
338 // clearing all words associated with this row.
339 RemoveRowFromIndex(row);
340 row_was_updated = true;
341 }
342 if (row_was_updated)
343 search_term_cache_.clear(); // This invalidates the cache.
344 return row_was_updated;
345 }
346
347 // Helper functor for DeleteURL.
348 class HistoryInfoMapItemHasURL {
349 public:
350 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
351
352 bool operator()(const std::pair<const HistoryID, URLRow>& item) {
353 return item.second.url() == url_;
354 }
355
356 private:
357 const GURL& url_;
358 };
359
360 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
361 // Find the matching entry in the history_info_map_.
362 HistoryInfoMap::iterator pos = std::find_if(
363 history_info_map_.begin(),
364 history_info_map_.end(),
365 HistoryInfoMapItemHasURL(url));
366 if (pos == history_info_map_.end())
367 return false;
368 RemoveRowFromIndex(pos->second);
369 search_term_cache_.clear(); // This invalidates the cache.
370 return true;
371 }
372
373 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
374
375 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
376 const HistoryInfoMap& history_info_map)
377 : history_info_map_(history_info_map) {
378 }
379
380 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
381
382 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
383 const HistoryID h1,
384 const HistoryID h2) {
385 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
386 if (entry1 == history_info_map_.end())
387 return false;
388 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
389 if (entry2 == history_info_map_.end())
390 return true;
391 const URLRow& r1(entry1->second);
392 const URLRow& r2(entry2->second);
393 // First cut: typed count, visit count, recency.
394 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
395 // recently visited (within the last 12/24 hours) as highly important. Get
396 // input from mpearson.
397 if (r1.typed_count() != r2.typed_count())
398 return (r1.typed_count() > r2.typed_count());
399 if (r1.visit_count() != r2.visit_count())
400 return (r1.visit_count() > r2.visit_count());
401 return (r1.last_visit() > r2.last_visit());
402 }
403
404 // Cache Searching -------------------------------------------------------------
405
406 // NOTE: This is the main public search function.
407 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
408 const string16& search_string) {
409 pre_filter_item_count_ = 0;
410 post_filter_item_count_ = 0;
411 post_scoring_item_count_ = 0;
412 // The search string we receive may contain escaped characters. For reducing
413 // the index we need individual, lower-cased words, ignoring escapings. For
414 // the final filtering we need whitespace separated substrings possibly
415 // containing escaped characters.
416 string16 lower_raw_string(base::i18n::ToLower(search_string));
417 string16 lower_unescaped_string =
418 net::UnescapeURLComponent(lower_raw_string,
419 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
420 // Extract individual 'words' (as opposed to 'terms'; see below) from the
421 // search string. When the user types "colspec=ID%20Mstone Release" we get
422 // four 'words': "colspec", "id", "mstone" and "release".
423 String16Vector lower_words(
424 history::String16VectorFromString16(lower_unescaped_string, false, NULL));
425 ScoredHistoryMatches scored_items;
426
427 // Do nothing if we have indexed no words (probably because we've not been
428 // initialized yet) or the search string has no words.
429 if (word_list_.empty() || lower_words.empty()) {
430 search_term_cache_.clear(); // Invalidate the term cache.
431 return scored_items;
432 }
433
434 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
435 // approach.
436 ResetSearchTermCache();
437
438 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
439
440 // Trim the candidate pool if it is large. Note that we do not filter out
441 // items that do not contain the search terms as proper substrings -- doing
442 // so is the performance-costly operation we are trying to avoid in order
443 // to maintain omnibox responsiveness.
444 const size_t kItemsToScoreLimit = 500;
445 pre_filter_item_count_ = history_id_set.size();
446 // If we trim the results set we do not want to cache the results for next
447 // time as the user's ultimately desired result could easily be eliminated
448 // in this early rough filter.
449 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
450 if (was_trimmed) {
451 HistoryIDVector history_ids;
452 std::copy(history_id_set.begin(), history_id_set.end(),
453 std::back_inserter(history_ids));
454 // Trim down the set by sorting by typed-count, visit-count, and last
455 // visit.
456 HistoryItemFactorGreater
457 item_factor_functor(history_info_map_);
458 std::partial_sort(history_ids.begin(),
459 history_ids.begin() + kItemsToScoreLimit,
460 history_ids.end(),
461 item_factor_functor);
462 history_id_set.clear();
463 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
464 std::inserter(history_id_set, history_id_set.end()));
465 post_filter_item_count_ = history_id_set.size();
466 }
467
468 // Pass over all of the candidates filtering out any without a proper
469 // substring match, inserting those which pass in order by score. Note that
470 // in this step we are using the raw search string complete with escaped
471 // URL elements. When the user has specifically typed something akin to
472 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
473 // specific substring appears in the URL or page title.
474
475 // We call these 'terms' (as opposed to 'words'; see above) as in this case
476 // we only want to break up the search string on 'true' whitespace rather than
477 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
478 // get two 'terms': "colspec=id%20mstone" and "release".
479 history::String16Vector lower_raw_terms;
480 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);
481 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
482 AddHistoryMatch(*this, lower_raw_string,
483 lower_raw_terms, base::Time::Now())).ScoredMatches();
484
485 // Select and sort only the top kMaxMatches results.
486 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
487 std::partial_sort(scored_items.begin(),
488 scored_items.begin() +
489 AutocompleteProvider::kMaxMatches,
490 scored_items.end(),
491 ScoredHistoryMatch::MatchScoreGreater);
492 scored_items.resize(AutocompleteProvider::kMaxMatches);
493 } else {
494 std::sort(scored_items.begin(), scored_items.end(),
495 ScoredHistoryMatch::MatchScoreGreater);
496 }
497 post_scoring_item_count_ = scored_items.size();
498
499 if (was_trimmed) {
500 search_term_cache_.clear(); // Invalidate the term cache.
501 } else {
502 // Remove any stale SearchTermCacheItems.
503 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
504 cache_iter != search_term_cache_.end(); ) {
505 if (!cache_iter->second.used_)
506 search_term_cache_.erase(cache_iter++);
507 else
508 ++cache_iter;
509 }
510 }
511
512 return scored_items;
513 }
514
515 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------
516
517 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
518 const URLIndexPrivateData& private_data,
519 const string16& lower_string,
520 const String16Vector& lower_terms,
521 const base::Time now)
522 : private_data_(private_data),
523 lower_string_(lower_string),
524 lower_terms_(lower_terms),
525 now_(now) {}
526
527 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
528
529 void URLIndexPrivateData::AddHistoryMatch::operator()(
530 const HistoryID history_id) {
531 HistoryInfoMap::const_iterator hist_pos =
532 private_data_.history_info_map_.find(history_id);
533 if (hist_pos != private_data_.history_info_map_.end()) {
534 const URLRow& hist_item = hist_pos->second;
535 WordStartsMap::const_iterator starts_pos =
536 private_data_.word_starts_map_.find(history_id);
537 DCHECK(starts_pos != private_data_.word_starts_map_.end());
538 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,
539 starts_pos->second, now_);
540 if (match.raw_score > 0)
541 scored_matches_.push_back(match);
542 }
543 }
544
868 void URLIndexPrivateData::ResetSearchTermCache() { 545 void URLIndexPrivateData::ResetSearchTermCache() {
869 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 546 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
870 iter != search_term_cache_.end(); ++iter) 547 iter != search_term_cache_.end(); ++iter)
871 iter->second.used_ = false; 548 iter->second.used_ = false;
872 } 549 }
873 550
874 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 551 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
875 const String16Vector& unsorted_words) { 552 const String16Vector& unsorted_words) {
876 // Break the terms down into individual terms (words), get the candidate 553 // Break the terms down into individual terms (words), get the candidate
877 // set for each term, and intersect each to get a final candidate list. 554 // set for each term, and intersect each to get a final candidate list.
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
1046 std::set_intersection(word_id_set.begin(), word_id_set.end(), 723 std::set_intersection(word_id_set.begin(), word_id_set.end(),
1047 char_word_id_set.begin(), char_word_id_set.end(), 724 char_word_id_set.begin(), char_word_id_set.end(),
1048 std::inserter(new_word_id_set, 725 std::inserter(new_word_id_set,
1049 new_word_id_set.begin())); 726 new_word_id_set.begin()));
1050 word_id_set.swap(new_word_id_set); 727 word_id_set.swap(new_word_id_set);
1051 } 728 }
1052 } 729 }
1053 return word_id_set; 730 return word_id_set;
1054 } 731 }
1055 732
1056 bool URLIndexPrivateData::RestoreFromCache(InMemoryURLCacheDatabase* cache_db) { 733 // Cache Saving ----------------------------------------------------------------
1057 DCHECK(cache_db); 734
1058 if (!cache_db->RestorePrivateData(this)) { 735 // static
1059 cache_db->Reset(); 736 void URLIndexPrivateData::WritePrivateDataToCacheFileTask(
1060 return false; 737 scoped_refptr<URLIndexPrivateData> private_data,
1061 } 738 const FilePath& file_path,
1062 return !Empty() && ValidateConsistency(); 739 scoped_refptr<RefCountedBool> succeeded) {
1063 } 740 DCHECK(private_data.get());
1064 741 DCHECK(!file_path.empty());
1065 void URLIndexPrivateData::DeleteOldVersionCacheFile() const { 742 succeeded->set_value(private_data->SaveToFile(file_path));
1066 if (history_dir_.empty()) 743 }
1067 return; 744
1068 FilePath path = history_dir_.Append(chrome::kHQPCacheFilename); 745 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {
1069 file_util::Delete(path, false); 746 base::TimeTicks beginning_time = base::TimeTicks::Now();
1070 } 747 InMemoryURLIndexCacheItem index_cache;
1071 748 SavePrivateData(&index_cache);
1072 bool URLIndexPrivateData::GetCacheDBPath(FilePath* file_path) { 749 std::string data;
1073 DCHECK(file_path); 750 if (!index_cache.SerializeToString(&data)) {
1074 if (history_dir_.empty()) 751 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
1075 return false; 752 return false;
1076 *file_path = history_dir_.Append(chrome::kHQPCacheDBFilename); 753 }
1077 return true; 754
1078 } 755 int size = data.size();
1079 756 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
1080 void URLIndexPrivateData::SetCacheDatabaseForTesting( 757 LOG(WARNING) << "Failed to write " << file_path.value();
1081 InMemoryURLCacheDatabase* test_db) { 758 return false;
1082 cache_db_ = test_db; 759 }
760 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
761 base::TimeTicks::Now() - beginning_time);
762 return true;
763 }
764
765 void URLIndexPrivateData::SavePrivateData(
766 InMemoryURLIndexCacheItem* cache) const {
767 DCHECK(cache);
768 cache->set_timestamp(base::Time::Now().ToInternalValue());
769 cache->set_version(saved_cache_version_);
770 // history_item_count_ is no longer used but rather than change the protobuf
771 // definition use a placeholder. This will go away with the switch to SQLite.
772 cache->set_history_item_count(0);
773 SaveWordList(cache);
774 SaveWordMap(cache);
775 SaveCharWordMap(cache);
776 SaveWordIDHistoryMap(cache);
777 SaveHistoryInfoMap(cache);
778 SaveWordStartsMap(cache);
779 }
780
781 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
782 if (word_list_.empty())
783 return;
784 WordListItem* list_item = cache->mutable_word_list();
785 list_item->set_word_count(word_list_.size());
786 for (String16Vector::const_iterator iter = word_list_.begin();
787 iter != word_list_.end(); ++iter)
788 list_item->add_word(UTF16ToUTF8(*iter));
789 }
790
791 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
792 if (word_map_.empty())
793 return;
794 WordMapItem* map_item = cache->mutable_word_map();
795 map_item->set_item_count(word_map_.size());
796 for (WordMap::const_iterator iter = word_map_.begin();
797 iter != word_map_.end(); ++iter) {
798 WordMapEntry* map_entry = map_item->add_word_map_entry();
799 map_entry->set_word(UTF16ToUTF8(iter->first));
800 map_entry->set_word_id(iter->second);
801 }
802 }
803
804 void URLIndexPrivateData::SaveCharWordMap(
805 InMemoryURLIndexCacheItem* cache) const {
806 if (char_word_map_.empty())
807 return;
808 CharWordMapItem* map_item = cache->mutable_char_word_map();
809 map_item->set_item_count(char_word_map_.size());
810 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
811 iter != char_word_map_.end(); ++iter) {
812 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
813 map_entry->set_char_16(iter->first);
814 const WordIDSet& word_id_set(iter->second);
815 map_entry->set_item_count(word_id_set.size());
816 for (WordIDSet::const_iterator set_iter = word_id_set.begin();
817 set_iter != word_id_set.end(); ++set_iter)
818 map_entry->add_word_id(*set_iter);
819 }
820 }
821
822 void URLIndexPrivateData::SaveWordIDHistoryMap(
823 InMemoryURLIndexCacheItem* cache) const {
824 if (word_id_history_map_.empty())
825 return;
826 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
827 map_item->set_item_count(word_id_history_map_.size());
828 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
829 iter != word_id_history_map_.end(); ++iter) {
830 WordIDHistoryMapEntry* map_entry =
831 map_item->add_word_id_history_map_entry();
832 map_entry->set_word_id(iter->first);
833 const HistoryIDSet& history_id_set(iter->second);
834 map_entry->set_item_count(history_id_set.size());
835 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
836 set_iter != history_id_set.end(); ++set_iter)
837 map_entry->add_history_id(*set_iter);
838 }
839 }
840
841 void URLIndexPrivateData::SaveHistoryInfoMap(
842 InMemoryURLIndexCacheItem* cache) const {
843 if (history_info_map_.empty())
844 return;
845 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
846 map_item->set_item_count(history_info_map_.size());
847 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
848 iter != history_info_map_.end(); ++iter) {
849 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
850 map_entry->set_history_id(iter->first);
851 const URLRow& url_row(iter->second);
852 // Note: We only save information that contributes to the index so there
853 // is no need to save search_term_cache_ (not persistent).
854 map_entry->set_visit_count(url_row.visit_count());
855 map_entry->set_typed_count(url_row.typed_count());
856 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
857 map_entry->set_url(url_row.url().spec());
858 map_entry->set_title(UTF16ToUTF8(url_row.title()));
859 }
860 }
861
862 void URLIndexPrivateData::SaveWordStartsMap(
863 InMemoryURLIndexCacheItem* cache) const {
864 if (word_starts_map_.empty())
865 return;
866 // For unit testing: Enable saving of the cache as an earlier version to
867 // allow testing of cache file upgrading in ReadFromFile().
868 // TODO(mrossetti): Instead of intruding on production code with this kind of
869 // test harness, save a copy of an older version cache with known results.
870 // Implement this when switching the caching over to SQLite.
871 if (saved_cache_version_ < 1)
872 return;
873
874 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
875 map_item->set_item_count(word_starts_map_.size());
876 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();
877 iter != word_starts_map_.end(); ++iter) {
878 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
879 map_entry->set_history_id(iter->first);
880 const RowWordStarts& word_starts(iter->second);
881 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
882 i != word_starts.url_word_starts_.end(); ++i)
883 map_entry->add_url_word_starts(*i);
884 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
885 i != word_starts.title_word_starts_.end(); ++i)
886 map_entry->add_title_word_starts(*i);
887 }
888 }
889
890 // Cache Restoring -------------------------------------------------------------
891
892 // static
893 void URLIndexPrivateData::RestoreFromFileTask(
894 const FilePath& file_path,
895 scoped_refptr<URLIndexPrivateData> private_data,
896 const std::string& languages) {
897 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);
898 }
899
900 // static
901 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
902 const FilePath& file_path,
903 const std::string& languages) {
904 base::TimeTicks beginning_time = base::TimeTicks::Now();
905 if (!file_util::PathExists(file_path))
906 return NULL;
907 std::string data;
908 // If there is no cache file then simply give up. This will cause us to
909 // attempt to rebuild from the history database.
910 if (!file_util::ReadFileToString(file_path, &data))
911 return NULL;
912
913 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
914 InMemoryURLIndexCacheItem index_cache;
915 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
916 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
917 << file_path.value();
918 return restored_data;
919 }
920
921 if (!restored_data->RestorePrivateData(index_cache, languages))
922 return NULL;
923
924 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
925 base::TimeTicks::Now() - beginning_time);
926 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
927 restored_data->history_id_word_map_.size());
928 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
929 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
930 restored_data->word_map_.size());
931 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
932 restored_data->char_word_map_.size());
933 if (restored_data->Empty())
934 return NULL; // 'No data' is the same as a failed reload.
935 return restored_data;
936 }
937
938 // static
939 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
940 HistoryDatabase* history_db,
941 const std::string& languages,
942 const std::set<std::string>& scheme_whitelist) {
943 if (!history_db)
944 return NULL;
945
946 base::TimeTicks beginning_time = base::TimeTicks::Now();
947
948 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
949 URLDatabase::URLEnumerator history_enum;
950 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
951 return NULL;
952 for (URLRow row; history_enum.GetNextURL(&row); )
953 rebuilt_data->IndexRow(row, languages, scheme_whitelist);
954
955 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
956 base::TimeTicks::Now() - beginning_time);
957 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
958 rebuilt_data->history_id_word_map_.size());
959 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
960 rebuilt_data->word_map_.size());
961 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
962 rebuilt_data->char_word_map_.size());
963 return rebuilt_data;
964 }
965
966 bool URLIndexPrivateData::RestorePrivateData(
967 const InMemoryURLIndexCacheItem& cache,
968 const std::string& languages) {
969 if (cache.has_version())
970 restored_cache_version_ = cache.version();
971 return RestoreWordList(cache) && RestoreWordMap(cache) &&
972 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
973 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
974 }
975
976 bool URLIndexPrivateData::RestoreWordList(
977 const InMemoryURLIndexCacheItem& cache) {
978 if (!cache.has_word_list())
979 return false;
980 const WordListItem& list_item(cache.word_list());
981 uint32 expected_item_count = list_item.word_count();
982 uint32 actual_item_count = list_item.word_size();
983 if (actual_item_count == 0 || actual_item_count != expected_item_count)
984 return false;
985 const RepeatedPtrField<std::string>& words(list_item.word());
986 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
987 iter != words.end(); ++iter)
988 word_list_.push_back(UTF8ToUTF16(*iter));
989 return true;
990 }
991
992 bool URLIndexPrivateData::RestoreWordMap(
993 const InMemoryURLIndexCacheItem& cache) {
994 if (!cache.has_word_map())
995 return false;
996 const WordMapItem& list_item(cache.word_map());
997 uint32 expected_item_count = list_item.item_count();
998 uint32 actual_item_count = list_item.word_map_entry_size();
999 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1000 return false;
1001 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
1002 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
1003 iter != entries.end(); ++iter)
1004 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
1005 return true;
1006 }
1007
1008 bool URLIndexPrivateData::RestoreCharWordMap(
1009 const InMemoryURLIndexCacheItem& cache) {
1010 if (!cache.has_char_word_map())
1011 return false;
1012 const CharWordMapItem& list_item(cache.char_word_map());
1013 uint32 expected_item_count = list_item.item_count();
1014 uint32 actual_item_count = list_item.char_word_map_entry_size();
1015 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1016 return false;
1017 const RepeatedPtrField<CharWordMapEntry>&
1018 entries(list_item.char_word_map_entry());
1019 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
1020 entries.begin(); iter != entries.end(); ++iter) {
1021 expected_item_count = iter->item_count();
1022 actual_item_count = iter->word_id_size();
1023 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1024 return false;
1025 char16 uni_char = static_cast<char16>(iter->char_16());
1026 WordIDSet word_id_set;
1027 const RepeatedField<int32>& word_ids(iter->word_id());
1028 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
1029 jiter != word_ids.end(); ++jiter)
1030 word_id_set.insert(*jiter);
1031 char_word_map_[uni_char] = word_id_set;
1032 }
1033 return true;
1034 }
1035
1036 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1037 const InMemoryURLIndexCacheItem& cache) {
1038 if (!cache.has_word_id_history_map())
1039 return false;
1040 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1041 uint32 expected_item_count = list_item.item_count();
1042 uint32 actual_item_count = list_item.word_id_history_map_entry_size();
1043 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1044 return false;
1045 const RepeatedPtrField<WordIDHistoryMapEntry>&
1046 entries(list_item.word_id_history_map_entry());
1047 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1048 entries.begin(); iter != entries.end(); ++iter) {
1049 expected_item_count = iter->item_count();
1050 actual_item_count = iter->history_id_size();
1051 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1052 return false;
1053 WordID word_id = iter->word_id();
1054 HistoryIDSet history_id_set;
1055 const RepeatedField<int64>& history_ids(iter->history_id());
1056 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1057 jiter != history_ids.end(); ++jiter) {
1058 history_id_set.insert(*jiter);
1059 AddToHistoryIDWordMap(*jiter, word_id);
1060 }
1061 word_id_history_map_[word_id] = history_id_set;
1062 }
1063 return true;
1064 }
1065
1066 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1067 const InMemoryURLIndexCacheItem& cache) {
1068 if (!cache.has_history_info_map())
1069 return false;
1070 const HistoryInfoMapItem& list_item(cache.history_info_map());
1071 uint32 expected_item_count = list_item.item_count();
1072 uint32 actual_item_count = list_item.history_info_map_entry_size();
1073 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1074 return false;
1075 const RepeatedPtrField<HistoryInfoMapEntry>&
1076 entries(list_item.history_info_map_entry());
1077 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
1078 entries.begin(); iter != entries.end(); ++iter) {
1079 HistoryID history_id = iter->history_id();
1080 GURL url(iter->url());
1081 URLRow url_row(url, history_id);
1082 url_row.set_visit_count(iter->visit_count());
1083 url_row.set_typed_count(iter->typed_count());
1084 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1085 if (iter->has_title()) {
1086 string16 title(UTF8ToUTF16(iter->title()));
1087 url_row.set_title(title);
1088 }
1089 history_info_map_[history_id] = url_row;
1090 }
1091 return true;
1092 }
1093
1094 bool URLIndexPrivateData::RestoreWordStartsMap(
1095 const InMemoryURLIndexCacheItem& cache,
1096 const std::string& languages) {
1097 // Note that this function must be called after RestoreHistoryInfoMap() has
1098 // been run as the word starts may have to be recalculated from the urls and
1099 // page titles.
1100 if (cache.has_word_starts_map()) {
1101 const WordStartsMapItem& list_item(cache.word_starts_map());
1102 uint32 expected_item_count = list_item.item_count();
1103 uint32 actual_item_count = list_item.word_starts_map_entry_size();
1104 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1105 return false;
1106 const RepeatedPtrField<WordStartsMapEntry>&
1107 entries(list_item.word_starts_map_entry());
1108 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1109 entries.begin(); iter != entries.end(); ++iter) {
1110 HistoryID history_id = iter->history_id();
1111 RowWordStarts word_starts;
1112 // Restore the URL word starts.
1113 const RepeatedField<int32>& url_starts(iter->url_word_starts());
1114 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();
1115 jiter != url_starts.end(); ++jiter)
1116 word_starts.url_word_starts_.push_back(*jiter);
1117 // Restore the page title word starts.
1118 const RepeatedField<int32>& title_starts(iter->title_word_starts());
1119 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();
1120 jiter != title_starts.end(); ++jiter)
1121 word_starts.title_word_starts_.push_back(*jiter);
1122 word_starts_map_[history_id] = word_starts;
1123 }
1124 } else {
1125 // Since the cache did not contain any word starts we must rebuild then from
1126 // the URL and page titles.
1127 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1128 iter != history_info_map_.end(); ++iter) {
1129 RowWordStarts word_starts;
1130 const URLRow& row(iter->second);
1131 string16 url(net::FormatUrl(row.url(), languages,
1132 net::kFormatUrlOmitUsernamePassword,
1133 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
1134 NULL, NULL, NULL));
1135 url = base::i18n::ToLower(url);
1136 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1137 String16VectorFromString16(
1138 row.title(), false, &word_starts.title_word_starts_);
1139 word_starts_map_[iter->first] = word_starts;
1140 }
1141 }
1142 return true;
1083 } 1143 }
1084 1144
1085 // static 1145 // static
1086 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1146 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1087 const GURL& gurl, 1147 const GURL& gurl,
1088 const std::set<std::string>& whitelist) { 1148 const std::set<std::string>& whitelist) {
1089 return whitelist.find(gurl.scheme()) != whitelist.end(); 1149 return whitelist.find(gurl.scheme()) != whitelist.end();
1090 } 1150 }
1091 1151
1092 } // namespace history 1152 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/history/url_index_private_data_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698