chrome/browser/history/url_index_private_data.cc - Issue 9655003: Gather word-start Information to Aid in Scoring.

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9655003: Gather word-start Information to Aid in Scoring. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

(...skipping 24 matching lines...) Expand all Loading...
35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry	35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry

36 CharWordMapEntry;	36 CharWordMapEntry;

37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem	37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

38 WordIDHistoryMapItem;	38 WordIDHistoryMapItem;

39 typedef imui::	39 typedef imui::

40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry	40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

41 WordIDHistoryMapEntry;	41 WordIDHistoryMapEntry;

42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;	42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry	43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

44 HistoryInfoMapEntry;	44 HistoryInfoMapEntry;

	45 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;

	46 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

	47 WordStartsMapEntry;

45	48

46 // The maximum score any candidate result can achieve.	49 // The maximum score any candidate result can achieve.

47 const int kMaxTotalScore = 1425;	50 const int kMaxTotalScore = 1425;

48	51

49 // Score ranges used to get a 'base' score for each of the scoring factors	52 // Score ranges used to get a 'base' score for each of the scoring factors

50 // (such as recency of last visit, times visited, times the URL was typed,	53 // (such as recency of last visit, times visited, times the URL was typed,

51 // and the quality of the string match). There is a matching value range for	54 // and the quality of the string match). There is a matching value range for

52 // each of these scores for each factor. Note that the top score is greater	55 // each of these scores for each factor. Note that the top score is greater

53 // than \|kMaxTotalScore\|. The score for each candidate will be capped in the	56 // than \|kMaxTotalScore\|. The score for each candidate will be capped in the

54 // final calculation.	57 // final calculation.

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
108 score += (value - value_ranks[i]) *	111 score += (value - value_ranks[i]) *

109 (kScoreRank[i - 1] - kScoreRank[i]) /	112 (kScoreRank[i - 1] - kScoreRank[i]) /

110 (value_ranks[i - 1] - value_ranks[i]);	113 (value_ranks[i - 1] - value_ranks[i]);

111 }	114 }

112 return score;	115 return score;

113 }	116 }

114	117

115 // InMemoryURLIndex's Private Data ---------------------------------------------	118 // InMemoryURLIndex's Private Data ---------------------------------------------

116	119

117 URLIndexPrivateData::URLIndexPrivateData()	120 URLIndexPrivateData::URLIndexPrivateData()

118 : pre_filter_item_count_(0),	121 : restored_cache_version_(0),

	122 saved_cache_version_(kCurrentCacheFileVersion),

	123 pre_filter_item_count_(0),

119 post_filter_item_count_(0),	124 post_filter_item_count_(0),

120 post_scoring_item_count_(0) {	125 post_scoring_item_count_(0) {

121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);	126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);

122 }	127 }

123	128

124 URLIndexPrivateData::~URLIndexPrivateData() {}	129 URLIndexPrivateData::~URLIndexPrivateData() {}

125	130

126 void URLIndexPrivateData::Clear() {	131 void URLIndexPrivateData::Clear() {

127 word_list_.clear();	132 word_list_.clear();

128 available_words_.clear();	133 available_words_.clear();

129 word_map_.clear();	134 word_map_.clear();

130 char_word_map_.clear();	135 char_word_map_.clear();

131 word_id_history_map_.clear();	136 word_id_history_map_.clear();

132 history_id_word_map_.clear();	137 history_id_word_map_.clear();

133 history_info_map_.clear();	138 history_info_map_.clear();

	139 word_starts_map_.clear();

134 }	140 }

135	141

136 // Cache Updating --------------------------------------------------------------	142 // Cache Updating --------------------------------------------------------------

137	143

138 bool URLIndexPrivateData::IndexRow(const URLRow& row) {	144 bool URLIndexPrivateData::IndexRow(const URLRow& row) {

139 const GURL& gurl(row.url());	145 const GURL& gurl(row.url());

140	146

141 // Index only URLs with a whitelisted scheme.	147 // Index only URLs with a whitelisted scheme.

142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))	148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))

143 return false;	149 return false;

(...skipping 10 matching lines...) Expand all Loading...
154	160

155 // Add the row for quick lookup in the history info store.	161 // Add the row for quick lookup in the history info store.

156 URLRow new_row(GURL(url), row_id);	162 URLRow new_row(GURL(url), row_id);

157 new_row.set_visit_count(row.visit_count());	163 new_row.set_visit_count(row.visit_count());

158 new_row.set_typed_count(row.typed_count());	164 new_row.set_typed_count(row.typed_count());

159 new_row.set_last_visit(row.last_visit());	165 new_row.set_last_visit(row.last_visit());

160 new_row.set_title(row.title());	166 new_row.set_title(row.title());

161 history_info_map_[history_id] = new_row;	167 history_info_map_[history_id] = new_row;

162	168

163 // Index the words contained in the URL and title of the row.	169 // Index the words contained in the URL and title of the row.

164 AddRowWordsToIndex(new_row);	170 WordStarts word_starts;

	171 AddRowWordsToIndex(new_row, &word_starts);

	172 word_starts_map_[history_id] = word_starts;

165 return true;	173 return true;

166 }	174 }

167	175

168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) {	176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

	177 WordStarts* word_starts) {

169 HistoryID history_id = static_cast<HistoryID>(row.id());	178 HistoryID history_id = static_cast<HistoryID>(row.id());

170 // Split URL into individual, unique words then add in the title words.	179 // Split URL into individual, unique words then add in the title words.

171 const GURL& gurl(row.url());	180 const GURL& gurl(row.url());

172 string16 url(net::FormatUrl(gurl, languages_,	181 string16 url(net::FormatUrl(gurl, languages_,

173 net::kFormatUrlOmitUsernamePassword,	182 net::kFormatUrlOmitUsernamePassword,

174 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	183 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

175 NULL, NULL, NULL));	184 NULL, NULL, NULL));

176 url = base::i18n::ToLower(url);	185 url = base::i18n::ToLower(url);

177 String16Set url_words = String16SetFromString16(url);	186 String16Set url_words = String16SetFromString16(url,

178 String16Set title_words = String16SetFromString16(row.title());	187 word_starts ? &word_starts->url_word_starts_ : NULL);

	188 String16Set title_words = String16SetFromString16(row.title(),

	189 word_starts ? &word_starts->title_word_starts_ : NULL);

179 String16Set words;	190 String16Set words;

180 std::set_union(url_words.begin(), url_words.end(),	191 std::set_union(url_words.begin(), url_words.end(),

181 title_words.begin(), title_words.end(),	192 title_words.begin(), title_words.end(),

182 std::insert_iterator<String16Set>(words, words.begin()));	193 std::insert_iterator<String16Set>(words, words.begin()));

183 for (String16Set::iterator word_iter = words.begin();	194 for (String16Set::iterator word_iter = words.begin();

184 word_iter != words.end(); ++word_iter)	195 word_iter != words.end(); ++word_iter)

185 AddWordToIndex(*word_iter, history_id);	196 AddWordToIndex(*word_iter, history_id);

186	197

187 search_term_cache_.clear(); // Invalidate the term cache.	198 search_term_cache_.clear(); // Invalidate the term cache.

188 }	199 }

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 word_id_set.insert(word_id);	250 word_id_set.insert(word_id);

240 char_word_map_[uni_char] = word_id_set;	251 char_word_map_[uni_char] = word_id_set;

241 }	252 }

242 }	253 }

243 }	254 }

244	255

245 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {	256 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

246 RemoveRowWordsFromIndex(row);	257 RemoveRowWordsFromIndex(row);

247 HistoryID history_id = static_cast<HistoryID>(row.id());	258 HistoryID history_id = static_cast<HistoryID>(row.id());

248 history_info_map_.erase(history_id);	259 history_info_map_.erase(history_id);

	260 word_starts_map_.erase(history_id);

249 }	261 }

250	262

251 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {	263 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

252 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	264 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

253 // this row.	265 // this row.

254 HistoryID history_id = static_cast<HistoryID>(row.id());	266 HistoryID history_id = static_cast<HistoryID>(row.id());

255 WordIDSet word_id_set = history_id_word_map_[history_id];	267 WordIDSet word_id_set = history_id_word_map_[history_id];

256 history_id_word_map_.erase(history_id);	268 history_id_word_map_.erase(history_id);

257	269

258 // Reconcile any changes to word usage.	270 // Reconcile any changes to word usage.

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
321 row_to_update.set_visit_count(row.visit_count());	333 row_to_update.set_visit_count(row.visit_count());

322 row_to_update.set_typed_count(row.typed_count());	334 row_to_update.set_typed_count(row.typed_count());

323 row_to_update.set_last_visit(row.last_visit());	335 row_to_update.set_last_visit(row.last_visit());

324 // While the URL is guaranteed to remain stable, the title may have	336 // While the URL is guaranteed to remain stable, the title may have

325 // changed. If so, then update the index with the changed words.	337 // changed. If so, then update the index with the changed words.

326 if (title_updated) {	338 if (title_updated) {

327 // Clear all words associated with this row and re-index both the	339 // Clear all words associated with this row and re-index both the

328 // URL and title.	340 // URL and title.

329 RemoveRowWordsFromIndex(row_to_update);	341 RemoveRowWordsFromIndex(row_to_update);

330 row_to_update.set_title(row.title());	342 row_to_update.set_title(row.title());

331 AddRowWordsToIndex(row_to_update);	343 WordStarts word_starts;

	344 AddRowWordsToIndex(row_to_update, &word_starts);

	345 word_starts_map_[row_id] = word_starts;

332 }	346 }

333 row_was_updated = true;	347 row_was_updated = true;

334 }	348 }

335 } else {	349 } else {

336 // This indexed row no longer qualifies and will be de-indexed by	350 // This indexed row no longer qualifies and will be de-indexed by

337 // clearing all words associated with this row.	351 // clearing all words associated with this row.

338 RemoveRowFromIndex(row);	352 RemoveRowFromIndex(row);

339 row_was_updated = true;	353 row_was_updated = true;

340 }	354 }

341 if (row_was_updated)	355 if (row_was_updated)

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
417 // the final filtering we need whitespace separated substrings possibly	431 // the final filtering we need whitespace separated substrings possibly

418 // containing escaped characters.	432 // containing escaped characters.

419 string16 lower_raw_string(base::i18n::ToLower(search_string));	433 string16 lower_raw_string(base::i18n::ToLower(search_string));

420 string16 lower_unescaped_string =	434 string16 lower_unescaped_string =

421 net::UnescapeURLComponent(lower_raw_string,	435 net::UnescapeURLComponent(lower_raw_string,

422 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);	436 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

423 // Extract individual 'words' (as opposed to 'terms'; see below) from the	437 // Extract individual 'words' (as opposed to 'terms'; see below) from the

424 // search string. When the user types "colspec=ID%20Mstone Release" we get	438 // search string. When the user types "colspec=ID%20Mstone Release" we get

425 // four 'words': "colspec", "id", "mstone" and "release".	439 // four 'words': "colspec", "id", "mstone" and "release".

426 String16Vector lower_words(	440 String16Vector lower_words(

427 history::String16VectorFromString16(lower_unescaped_string, false));	441 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

428 ScoredHistoryMatches scored_items;	442 ScoredHistoryMatches scored_items;

429	443

430 // Do nothing if we have indexed no words (probably because we've not been	444 // Do nothing if we have indexed no words (probably because we've not been

431 // initialized yet) or the search string has no words.	445 // initialized yet) or the search string has no words.

432 if (word_list_.empty() \|\| lower_words.empty()) {	446 if (word_list_.empty() \|\| lower_words.empty()) {

433 search_term_cache_.clear(); // Invalidate the term cache.	447 search_term_cache_.clear(); // Invalidate the term cache.

434 return scored_items;	448 return scored_items;

435 }	449 }

436	450

437 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	451 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
529	543

530 void URLIndexPrivateData::AddHistoryMatch::operator()(	544 void URLIndexPrivateData::AddHistoryMatch::operator()(

531 const HistoryID history_id) {	545 const HistoryID history_id) {

532 HistoryInfoMap::const_iterator hist_pos =	546 HistoryInfoMap::const_iterator hist_pos =

533 private_data_.history_info_map_.find(history_id);	547 private_data_.history_info_map_.find(history_id);

534 // Note that a history_id may be present in the word_id_history_map_ yet not	548 // Note that a history_id may be present in the word_id_history_map_ yet not

535 // be found in the history_info_map_. This occurs when an item has been	549 // be found in the history_info_map_. This occurs when an item has been

536 // deleted by the user or the item no longer qualifies as a quick result.	550 // deleted by the user or the item no longer qualifies as a quick result.

537 if (hist_pos != private_data_.history_info_map_.end()) {	551 if (hist_pos != private_data_.history_info_map_.end()) {

538 const URLRow& hist_item = hist_pos->second;	552 const URLRow& hist_item = hist_pos->second;

539 ScoredHistoryMatch match(	553 WordStartsMap::const_iterator starts_pos =

540 ScoredMatchForURL(hist_item, lower_string_, lower_terms_));	554 private_data_.word_starts_map_.find(history_id);

	555 DCHECK(starts_pos != private_data_.word_starts_map_.end());

	556 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_,

	557 lower_terms_,

	558 starts_pos->second));

541 if (match.raw_score > 0)	559 if (match.raw_score > 0)

542 scored_matches_.push_back(match);	560 scored_matches_.push_back(match);

543 }	561 }

544 }	562 }

545	563

546 // static	564 // static

547 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.	565 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.

548 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL(	566 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL(

549 const URLRow& row,	567 const URLRow& row,

550 const string16& lower_string,	568 const string16& lower_string,

551 const String16Vector& terms) {	569 const String16Vector& terms,

	570 const WordStarts& word_starts) {

552 ScoredHistoryMatch match(row);	571 ScoredHistoryMatch match(row);

553 GURL gurl = row.url();	572 GURL gurl = row.url();

554 if (!gurl.is_valid())	573 if (!gurl.is_valid())

555 return match;	574 return match;

556	575

557 // Figure out where each search term appears in the URL and/or page title	576 // Figure out where each search term appears in the URL and/or page title

558 // so that we can score as well as provide autocomplete highlighting.	577 // so that we can score as well as provide autocomplete highlighting.

559 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));	578 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));

560 string16 title = base::i18n::ToLower(row.title());	579 string16 title = base::i18n::ToLower(row.title());

561 int term_num = 0;	580 int term_num = 0;

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
657 for (size_t i = 1; i < matches.size(); ++i) {	676 for (size_t i = 1; i < matches.size(); ++i) {

658 if (matches[i - 1].term_num > matches[i].term_num)	677 if (matches[i - 1].term_num > matches[i].term_num)

659 ++out_of_order;	678 ++out_of_order;

660 }	679 }

661 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /	680 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /

662 max_possible_out_of_order;	681 max_possible_out_of_order;

663 }	682 }

664	683

665 // Score component for how early in the match string the first search term	684 // Score component for how early in the match string the first search term

666 // appears. Start with kStartMaxValue points and discount by	685 // appears. Start with kStartMaxValue points and discount by

667 // kStartMaxValue/kMaxSignificantStart points for each character later than	686 // kStartMaxValue/kMaxSignificantChars points for each character later than

668 // the first at which the term begins. No points are earned if the start of	687 // the first at which the term begins. No points are earned if the start of

669 // the match occurs at or after kMaxSignificantStart.	688 // the match occurs at or after kMaxSignificantChars.

670 const size_t kMaxSignificantStart = 50;

671 const int kStartMaxValue = 1000;	689 const int kStartMaxValue = 1000;

672 int start_value = (kMaxSignificantStart -	690 int start_value = (kMaxSignificantChars -

673 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /	691 std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue /

674 kMaxSignificantStart;	692 kMaxSignificantChars;

675	693

676 // Score component for how much of the matched string the input terms cover.	694 // Score component for how much of the matched string the input terms cover.

677 // kCompleteMaxValue points times the fraction of the URL/page title string	695 // kCompleteMaxValue points times the fraction of the URL/page title string

678 // that was matched.	696 // that was matched.

679 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),	697 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),

680 0, AccumulateMatchLength);	698 0, AccumulateMatchLength);

681 const size_t kMaxSignificantLength = 50;	699 const size_t kMaxSignificantLength = 50;

682 size_t max_significant_length =	700 size_t max_significant_length =

683 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));	701 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));

684 const int kCompleteMaxValue = 1000;	702 const int kCompleteMaxValue = 1000;

(...skipping 236 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
921 }	939 }

922 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",	940 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

923 base::TimeTicks::Now() - beginning_time);	941 base::TimeTicks::Now() - beginning_time);

924 return true;	942 return true;

925 }	943 }

926	944

927 void URLIndexPrivateData::SavePrivateData(	945 void URLIndexPrivateData::SavePrivateData(

928 InMemoryURLIndexCacheItem* cache) const {	946 InMemoryURLIndexCacheItem* cache) const {

929 DCHECK(cache);	947 DCHECK(cache);

930 cache->set_timestamp(base::Time::Now().ToInternalValue());	948 cache->set_timestamp(base::Time::Now().ToInternalValue());

	949 cache->set_version(saved_cache_version_);

931 // history_item_count_ is no longer used but rather than change the protobuf	950 // history_item_count_ is no longer used but rather than change the protobuf

932 // definition use a placeholder. This will go away with the switch to SQLite.	951 // definition use a placeholder. This will go away with the switch to SQLite.

933 cache->set_history_item_count(0);	952 cache->set_history_item_count(0);

934 SaveWordList(cache);	953 SaveWordList(cache);

935 SaveWordMap(cache);	954 SaveWordMap(cache);

936 SaveCharWordMap(cache);	955 SaveCharWordMap(cache);

937 SaveWordIDHistoryMap(cache);	956 SaveWordIDHistoryMap(cache);

938 SaveHistoryInfoMap(cache);	957 SaveHistoryInfoMap(cache);

	958 SaveWordStartsMap(cache);

939 }	959 }

940	960

941 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	961 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

942 if (word_list_.empty())	962 if (word_list_.empty())

943 return;	963 return;

944 WordListItem* list_item = cache->mutable_word_list();	964 WordListItem* list_item = cache->mutable_word_list();

945 list_item->set_word_count(word_list_.size());	965 list_item->set_word_count(word_list_.size());

946 for (String16Vector::const_iterator iter = word_list_.begin();	966 for (String16Vector::const_iterator iter = word_list_.begin();

947 iter != word_list_.end(); ++iter)	967 iter != word_list_.end(); ++iter)

948 list_item->add_word(UTF16ToUTF8(*iter));	968 list_item->add_word(UTF16ToUTF8(*iter));

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1013 // is no need to save search_term_cache_ (not persistent),	1033 // is no need to save search_term_cache_ (not persistent),

1014 // languages_, etc.	1034 // languages_, etc.

1015 map_entry->set_visit_count(url_row.visit_count());	1035 map_entry->set_visit_count(url_row.visit_count());

1016 map_entry->set_typed_count(url_row.typed_count());	1036 map_entry->set_typed_count(url_row.typed_count());

1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1018 map_entry->set_url(url_row.url().spec());	1038 map_entry->set_url(url_row.url().spec());

1019 map_entry->set_title(UTF16ToUTF8(url_row.title()));	1039 map_entry->set_title(UTF16ToUTF8(url_row.title()));

1020 }	1040 }

1021 }	1041 }

1022	1042

	1043 void URLIndexPrivateData::SaveWordStartsMap(

	1044 InMemoryURLIndexCacheItem* cache) const {

	1045 if (word_starts_map_.empty())

	1046 return;

	1047 // For unit testing: Enable saving of the cache as an earlier version to

	1048 // allow testing of cache file upgrading in ReadFromFile().
	Peter Kasting 2012/03/09 02:37:43 Nit: Is this the best way of doing this? For most Nit: Is this the best way of doing this? For most other versioned data storage migration tests, we keep a copy of an old version in chrome/test/data and then write code that directly opens and migrates it to check that it does what we want. This avoids being as intrusive to the real code that saves these. mrossetti 2012/03/14 23:23:49 I hear you, and understand. Since saving using pro Show quoted text On 2012/03/09 02:37:43, Peter Kasting wrote: > Nit: Is this the best way of doing this? For most other versioned data storage > migration tests, we keep a copy of an old version in chrome/test/data and then > write code that directly opens and migrates it to check that it does what we > want. This avoids being as intrusive to the real code that saves these. I hear you, and understand. Since saving using protobuf is going to be replaced in the near future (for some definition of 'near') and because this is pretty complex data I'm going to leave it as-is for now and see how difficult it would be to take the proper approach once this caching has moved over to SQLite. I've added a TODO to remind me of this.
	1049 if (saved_cache_version_ < 1)

	1050 return;

	1051

	1052 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

	1053 map_item->set_item_count(word_starts_map_.size());

	1054 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

	1055 iter != word_starts_map_.end(); ++iter) {

	1056 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

	1057 map_entry->set_history_id(iter->first);

	1058 const WordStarts& word_starts(iter->second);

	1059 for (std::vector<int>::const_iterator siter =

	1060 word_starts.url_word_starts_.begin();

	1061 siter != word_starts.url_word_starts_.end(); ++siter)

	1062 map_entry->add_url_word_starts(*siter);

	1063 for (std::vector<int>::const_iterator siter =

	1064 word_starts.title_word_starts_.begin();

	1065 siter != word_starts.title_word_starts_.end(); ++siter)

	1066 map_entry->add_title_word_starts(*siter);

	1067 }

	1068 }

	1069

1023 // Cache Restoring -------------------------------------------------------------	1070 // Cache Restoring -------------------------------------------------------------

1024	1071

1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {	1072 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {

1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.	1073 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.

1027 // That is: ensure that the database has not been modified since the cache	1074 // That is: ensure that the database has not been modified since the cache

1028 // was last saved. DB file modification date is inadequate. There are no	1075 // was last saved. DB file modification date is inadequate. There are no

1029 // SQLite table checksums automatically stored.	1076 // SQLite table checksums automatically stored.

1030 Clear(); // Start with a clean slate.	1077 Clear(); // Start with a clean slate.

1031	1078

1032 // FIXME(mrossetti): Move File IO to another thread.	1079 // FIXME(mrossetti): Move File IO to another thread.

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1083 rebuilt_data->history_id_word_map_.size());	1130 rebuilt_data->history_id_word_map_.size());

1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	1131 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

1085 rebuilt_data->word_map_.size());	1132 rebuilt_data->word_map_.size());

1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	1133 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

1087 rebuilt_data->char_word_map_.size());	1134 rebuilt_data->char_word_map_.size());

1088 return rebuilt_data.release();	1135 return rebuilt_data.release();

1089 }	1136 }

1090	1137

1091 bool URLIndexPrivateData::RestorePrivateData(	1138 bool URLIndexPrivateData::RestorePrivateData(

1092 const InMemoryURLIndexCacheItem& cache) {	1139 const InMemoryURLIndexCacheItem& cache) {

	1140 if (cache.has_version())

	1141 restored_cache_version_ = cache.version();

1093 return RestoreWordList(cache) && RestoreWordMap(cache) &&	1142 return RestoreWordList(cache) && RestoreWordMap(cache) &&

1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&	1143 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

1095 RestoreHistoryInfoMap(cache);	1144 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);

1096 }	1145 }

1097	1146

1098 bool URLIndexPrivateData::RestoreWordList(	1147 bool URLIndexPrivateData::RestoreWordList(

1099 const InMemoryURLIndexCacheItem& cache) {	1148 const InMemoryURLIndexCacheItem& cache) {

1100 if (!cache.has_word_list())	1149 if (!cache.has_word_list())

1101 return false;	1150 return false;

1102 const WordListItem& list_item(cache.word_list());	1151 const WordListItem& list_item(cache.word_list());

1103 uint32 expected_item_count = list_item.word_count();	1152 uint32 expected_item_count = list_item.word_count();

1104 uint32 actual_item_count = list_item.word_size();	1153 uint32 actual_item_count = list_item.word_size();

1105 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1154 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1255 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

1207 if (iter->has_title()) {	1256 if (iter->has_title()) {

1208 string16 title(UTF8ToUTF16(iter->title()));	1257 string16 title(UTF8ToUTF16(iter->title()));

1209 url_row.set_title(title);	1258 url_row.set_title(title);

1210 }	1259 }

1211 history_info_map_[history_id] = url_row;	1260 history_info_map_[history_id] = url_row;

1212 }	1261 }

1213 return true;	1262 return true;

1214 }	1263 }

1215	1264

	1265 bool URLIndexPrivateData::RestoreWordStartsMap(

	1266 const InMemoryURLIndexCacheItem& cache) {

	1267 // Note that this function must be called after RestoreHistoryInfoMap() has

	1268 // been run as the word starts may have to be recalculated from the urls and

	1269 // page titles.

	1270 if (cache.has_word_starts_map()) {

	1271 const WordStartsMapItem& list_item(cache.word_starts_map());

	1272 uint32 expected_item_count = list_item.item_count();

	1273 uint32 actual_item_count = list_item.word_starts_map_entry_size();

	1274 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1275 return false;

	1276 const RepeatedPtrField<WordStartsMapEntry>&

	1277 entries(list_item.word_starts_map_entry());

	1278 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

	1279 entries.begin(); iter != entries.end(); ++iter) {

	1280 HistoryID history_id = iter->history_id();

	1281 WordStarts word_starts;

	1282 // Restore the URL word starts.

	1283 const RepeatedField<int32>& url_starts(iter->url_word_starts());

	1284 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();
	Peter Kasting 2012/03/09 02:37:43 Nit: Argh... use \|i\| and \|j\| rather than \|iter\| an Nit: Argh... use \|i\| and \|j\| rather than \|iter\| and \|jiter\|, as \|jiter\| looks like a misspelling of "jitter"
	1285 jiter != url_starts.end(); ++jiter)

	1286 word_starts.url_word_starts_.push_back(*jiter);

	1287 // Restore the page title word starts.

	1288 const RepeatedField<int32>& title_starts(iter->title_word_starts());

	1289 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

	1290 jiter != title_starts.end(); ++jiter)

	1291 word_starts.title_word_starts_.push_back(*jiter);

	1292 word_starts_map_[history_id] = word_starts;

	1293 }

	1294 } else {

	1295 // Since the cache did not contain any word starts we must rebuild then from

	1296 // the URL and page titles.

	1297 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

	1298 iter != history_info_map_.end(); ++iter) {

	1299 WordStarts word_starts;

	1300 const URLRow& row(iter->second);

	1301 string16 url(net::FormatUrl(row.url(), languages_,

	1302 net::kFormatUrlOmitUsernamePassword,

	1303 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

	1304 NULL, NULL, NULL));

	1305 url = base::i18n::ToLower(url);

	1306 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

	1307 String16VectorFromString16(

	1308 row.title(), false, &word_starts.title_word_starts_);

	1309 word_starts_map_[iter->first] = word_starts;

	1310 }

	1311 }

	1312 return true;

	1313 }

	1314

1216 } // namespace history	1315 } // namespace history

OLD	NEW

« chrome/browser/history/in_memory_url_index_types_unittest.cc ('K') | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »