chrome/browser/history/url_index_private_data.cc - Issue 9655003: Gather word-start Information to Aid in Scoring.

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9655003: Gather word-start Information to Aid in Scoring. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

(...skipping 24 matching lines...) Expand all Loading...
35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry	35 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry

36 CharWordMapEntry;	36 CharWordMapEntry;

37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem	37 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

38 WordIDHistoryMapItem;	38 WordIDHistoryMapItem;

39 typedef imui::	39 typedef imui::

40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry	40 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

41 WordIDHistoryMapEntry;	41 WordIDHistoryMapEntry;

42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;	42 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry	43 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

44 HistoryInfoMapEntry;	44 HistoryInfoMapEntry;

	45 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;

	46 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

	47 WordStartsMapEntry;

45	48

46 // The maximum score any candidate result can achieve.	49 // The maximum score any candidate result can achieve.

47 const int kMaxTotalScore = 1425;	50 const int kMaxTotalScore = 1425;

48	51

49 // Score ranges used to get a 'base' score for each of the scoring factors	52 // Score ranges used to get a 'base' score for each of the scoring factors

50 // (such as recency of last visit, times visited, times the URL was typed,	53 // (such as recency of last visit, times visited, times the URL was typed,

51 // and the quality of the string match). There is a matching value range for	54 // and the quality of the string match). There is a matching value range for

52 // each of these scores for each factor. Note that the top score is greater	55 // each of these scores for each factor. Note that the top score is greater

53 // than \|kMaxTotalScore\|. The score for each candidate will be capped in the	56 // than \|kMaxTotalScore\|. The score for each candidate will be capped in the

54 // final calculation.	57 // final calculation.

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
108 score += (value - value_ranks[i]) *	111 score += (value - value_ranks[i]) *

109 (kScoreRank[i - 1] - kScoreRank[i]) /	112 (kScoreRank[i - 1] - kScoreRank[i]) /

110 (value_ranks[i - 1] - value_ranks[i]);	113 (value_ranks[i - 1] - value_ranks[i]);

111 }	114 }

112 return score;	115 return score;

113 }	116 }

114	117

115 // InMemoryURLIndex's Private Data ---------------------------------------------	118 // InMemoryURLIndex's Private Data ---------------------------------------------

116	119

117 URLIndexPrivateData::URLIndexPrivateData()	120 URLIndexPrivateData::URLIndexPrivateData()

118 : pre_filter_item_count_(0),	121 : restored_cache_version_(0),

	122 saved_cache_version_(kCurrentCacheFileVersion),

	123 pre_filter_item_count_(0),

119 post_filter_item_count_(0),	124 post_filter_item_count_(0),

120 post_scoring_item_count_(0) {	125 post_scoring_item_count_(0) {

121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);	126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);

122 }	127 }

123	128

124 URLIndexPrivateData::~URLIndexPrivateData() {}	129 URLIndexPrivateData::~URLIndexPrivateData() {}

125	130

126 void URLIndexPrivateData::Clear() {	131 void URLIndexPrivateData::Clear() {

127 word_list_.clear();	132 word_list_.clear();

128 available_words_.clear();	133 available_words_.clear();

129 word_map_.clear();	134 word_map_.clear();

130 char_word_map_.clear();	135 char_word_map_.clear();

131 word_id_history_map_.clear();	136 word_id_history_map_.clear();

132 history_id_word_map_.clear();	137 history_id_word_map_.clear();

133 history_info_map_.clear();	138 history_info_map_.clear();

	139 word_starts_map_.clear();

134 }	140 }

135	141

136 // Cache Updating --------------------------------------------------------------	142 // Cache Updating --------------------------------------------------------------

137	143

138 bool URLIndexPrivateData::IndexRow(const URLRow& row) {	144 bool URLIndexPrivateData::IndexRow(const URLRow& row) {

139 const GURL& gurl(row.url());	145 const GURL& gurl(row.url());

140	146

141 // Index only URLs with a whitelisted scheme.	147 // Index only URLs with a whitelisted scheme.

142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))	148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))

143 return false;	149 return false;

(...skipping 10 matching lines...) Expand all Loading...
154	160

155 // Add the row for quick lookup in the history info store.	161 // Add the row for quick lookup in the history info store.

156 URLRow new_row(GURL(url), row_id);	162 URLRow new_row(GURL(url), row_id);

157 new_row.set_visit_count(row.visit_count());	163 new_row.set_visit_count(row.visit_count());

158 new_row.set_typed_count(row.typed_count());	164 new_row.set_typed_count(row.typed_count());

159 new_row.set_last_visit(row.last_visit());	165 new_row.set_last_visit(row.last_visit());

160 new_row.set_title(row.title());	166 new_row.set_title(row.title());

161 history_info_map_[history_id] = new_row;	167 history_info_map_[history_id] = new_row;

162	168

163 // Index the words contained in the URL and title of the row.	169 // Index the words contained in the URL and title of the row.

164 AddRowWordsToIndex(new_row);	170 RowWordStarts word_starts;

	171 AddRowWordsToIndex(new_row, &word_starts);

	172 word_starts_map_[history_id] = word_starts;

165 return true;	173 return true;

166 }	174 }

167	175

168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) {	176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

	177 RowWordStarts* word_starts) {

169 HistoryID history_id = static_cast<HistoryID>(row.id());	178 HistoryID history_id = static_cast<HistoryID>(row.id());

170 // Split URL into individual, unique words then add in the title words.	179 // Split URL into individual, unique words then add in the title words.

171 const GURL& gurl(row.url());	180 const GURL& gurl(row.url());

172 string16 url(net::FormatUrl(gurl, languages_,	181 string16 url(net::FormatUrl(gurl, languages_,

173 net::kFormatUrlOmitUsernamePassword,	182 net::kFormatUrlOmitUsernamePassword,

174 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	183 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

175 NULL, NULL, NULL));	184 NULL, NULL, NULL));

176 url = base::i18n::ToLower(url);	185 url = base::i18n::ToLower(url);

177 String16Set url_words = String16SetFromString16(url);	186 String16Set url_words = String16SetFromString16(url,

178 String16Set title_words = String16SetFromString16(row.title());	187 word_starts ? &word_starts->url_word_starts_ : NULL);

	188 String16Set title_words = String16SetFromString16(row.title(),

	189 word_starts ? &word_starts->title_word_starts_ : NULL);

179 String16Set words;	190 String16Set words;

180 std::set_union(url_words.begin(), url_words.end(),	191 std::set_union(url_words.begin(), url_words.end(),

181 title_words.begin(), title_words.end(),	192 title_words.begin(), title_words.end(),

182 std::insert_iterator<String16Set>(words, words.begin()));	193 std::insert_iterator<String16Set>(words, words.begin()));

183 for (String16Set::iterator word_iter = words.begin();	194 for (String16Set::iterator word_iter = words.begin();

184 word_iter != words.end(); ++word_iter)	195 word_iter != words.end(); ++word_iter)

185 AddWordToIndex(*word_iter, history_id);	196 AddWordToIndex(*word_iter, history_id);

186	197

187 search_term_cache_.clear(); // Invalidate the term cache.	198 search_term_cache_.clear(); // Invalidate the term cache.

188 }	199 }

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 word_id_set.insert(word_id);	250 word_id_set.insert(word_id);

240 char_word_map_[uni_char] = word_id_set;	251 char_word_map_[uni_char] = word_id_set;

241 }	252 }

242 }	253 }

243 }	254 }

244	255

245 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {	256 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

246 RemoveRowWordsFromIndex(row);	257 RemoveRowWordsFromIndex(row);

247 HistoryID history_id = static_cast<HistoryID>(row.id());	258 HistoryID history_id = static_cast<HistoryID>(row.id());

248 history_info_map_.erase(history_id);	259 history_info_map_.erase(history_id);

	260 word_starts_map_.erase(history_id);

249 }	261 }

250	262

251 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {	263 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

252 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	264 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

253 // this row.	265 // this row.

254 HistoryID history_id = static_cast<HistoryID>(row.id());	266 HistoryID history_id = static_cast<HistoryID>(row.id());

255 WordIDSet word_id_set = history_id_word_map_[history_id];	267 WordIDSet word_id_set = history_id_word_map_[history_id];

256 history_id_word_map_.erase(history_id);	268 history_id_word_map_.erase(history_id);

257	269

258 // Reconcile any changes to word usage.	270 // Reconcile any changes to word usage.

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
321 row_to_update.set_visit_count(row.visit_count());	333 row_to_update.set_visit_count(row.visit_count());

322 row_to_update.set_typed_count(row.typed_count());	334 row_to_update.set_typed_count(row.typed_count());

323 row_to_update.set_last_visit(row.last_visit());	335 row_to_update.set_last_visit(row.last_visit());

324 // While the URL is guaranteed to remain stable, the title may have	336 // While the URL is guaranteed to remain stable, the title may have

325 // changed. If so, then update the index with the changed words.	337 // changed. If so, then update the index with the changed words.

326 if (title_updated) {	338 if (title_updated) {

327 // Clear all words associated with this row and re-index both the	339 // Clear all words associated with this row and re-index both the

328 // URL and title.	340 // URL and title.

329 RemoveRowWordsFromIndex(row_to_update);	341 RemoveRowWordsFromIndex(row_to_update);

330 row_to_update.set_title(row.title());	342 row_to_update.set_title(row.title());

331 AddRowWordsToIndex(row_to_update);	343 RowWordStarts word_starts;

	344 AddRowWordsToIndex(row_to_update, &word_starts);

	345 word_starts_map_[row_id] = word_starts;

332 }	346 }

333 row_was_updated = true;	347 row_was_updated = true;

334 }	348 }

335 } else {	349 } else {

336 // This indexed row no longer qualifies and will be de-indexed by	350 // This indexed row no longer qualifies and will be de-indexed by

337 // clearing all words associated with this row.	351 // clearing all words associated with this row.

338 RemoveRowFromIndex(row);	352 RemoveRowFromIndex(row);

339 row_was_updated = true;	353 row_was_updated = true;

340 }	354 }

341 if (row_was_updated)	355 if (row_was_updated)

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
417 // the final filtering we need whitespace separated substrings possibly	431 // the final filtering we need whitespace separated substrings possibly

418 // containing escaped characters.	432 // containing escaped characters.

419 string16 lower_raw_string(base::i18n::ToLower(search_string));	433 string16 lower_raw_string(base::i18n::ToLower(search_string));

420 string16 lower_unescaped_string =	434 string16 lower_unescaped_string =

421 net::UnescapeURLComponent(lower_raw_string,	435 net::UnescapeURLComponent(lower_raw_string,

422 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);	436 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

423 // Extract individual 'words' (as opposed to 'terms'; see below) from the	437 // Extract individual 'words' (as opposed to 'terms'; see below) from the

424 // search string. When the user types "colspec=ID%20Mstone Release" we get	438 // search string. When the user types "colspec=ID%20Mstone Release" we get

425 // four 'words': "colspec", "id", "mstone" and "release".	439 // four 'words': "colspec", "id", "mstone" and "release".

426 String16Vector lower_words(	440 String16Vector lower_words(

427 history::String16VectorFromString16(lower_unescaped_string, false));	441 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

428 ScoredHistoryMatches scored_items;	442 ScoredHistoryMatches scored_items;

429	443

430 // Do nothing if we have indexed no words (probably because we've not been	444 // Do nothing if we have indexed no words (probably because we've not been

431 // initialized yet) or the search string has no words.	445 // initialized yet) or the search string has no words.

432 if (word_list_.empty() \|\| lower_words.empty()) {	446 if (word_list_.empty() \|\| lower_words.empty()) {

433 search_term_cache_.clear(); // Invalidate the term cache.	447 search_term_cache_.clear(); // Invalidate the term cache.

434 return scored_items;	448 return scored_items;

435 }	449 }

436	450

437 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	451 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
529	543

530 void URLIndexPrivateData::AddHistoryMatch::operator()(	544 void URLIndexPrivateData::AddHistoryMatch::operator()(

531 const HistoryID history_id) {	545 const HistoryID history_id) {

532 HistoryInfoMap::const_iterator hist_pos =	546 HistoryInfoMap::const_iterator hist_pos =

533 private_data_.history_info_map_.find(history_id);	547 private_data_.history_info_map_.find(history_id);

534 // Note that a history_id may be present in the word_id_history_map_ yet not	548 // Note that a history_id may be present in the word_id_history_map_ yet not

535 // be found in the history_info_map_. This occurs when an item has been	549 // be found in the history_info_map_. This occurs when an item has been

536 // deleted by the user or the item no longer qualifies as a quick result.	550 // deleted by the user or the item no longer qualifies as a quick result.

537 if (hist_pos != private_data_.history_info_map_.end()) {	551 if (hist_pos != private_data_.history_info_map_.end()) {

538 const URLRow& hist_item = hist_pos->second;	552 const URLRow& hist_item = hist_pos->second;

539 ScoredHistoryMatch match(	553 WordStartsMap::const_iterator starts_pos =

540 ScoredMatchForURL(hist_item, lower_string_, lower_terms_));	554 private_data_.word_starts_map_.find(history_id);

	555 DCHECK(starts_pos != private_data_.word_starts_map_.end());

	556 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_,

	557 lower_terms_,

	558 starts_pos->second));

541 if (match.raw_score > 0)	559 if (match.raw_score > 0)

542 scored_matches_.push_back(match);	560 scored_matches_.push_back(match);

543 }	561 }

544 }	562 }

545	563

546 // static	564 // static

547 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.	565 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.

548 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL(	566 ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL(

549 const URLRow& row,	567 const URLRow& row,

550 const string16& lower_string,	568 const string16& lower_string,

551 const String16Vector& terms) {	569 const String16Vector& terms,

	570 const RowWordStarts& word_starts) {

552 ScoredHistoryMatch match(row);	571 ScoredHistoryMatch match(row);

553 GURL gurl = row.url();	572 GURL gurl = row.url();

554 if (!gurl.is_valid())	573 if (!gurl.is_valid())

555 return match;	574 return match;

556	575

557 // Figure out where each search term appears in the URL and/or page title	576 // Figure out where each search term appears in the URL and/or page title

558 // so that we can score as well as provide autocomplete highlighting.	577 // so that we can score as well as provide autocomplete highlighting.

559 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));	578 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));

560 string16 title = base::i18n::ToLower(row.title());	579 string16 title = base::i18n::ToLower(row.title());

561 int term_num = 0;	580 int term_num = 0;

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
657 for (size_t i = 1; i < matches.size(); ++i) {	676 for (size_t i = 1; i < matches.size(); ++i) {

658 if (matches[i - 1].term_num > matches[i].term_num)	677 if (matches[i - 1].term_num > matches[i].term_num)

659 ++out_of_order;	678 ++out_of_order;

660 }	679 }

661 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /	680 order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /

662 max_possible_out_of_order;	681 max_possible_out_of_order;

663 }	682 }

664	683

665 // Score component for how early in the match string the first search term	684 // Score component for how early in the match string the first search term

666 // appears. Start with kStartMaxValue points and discount by	685 // appears. Start with kStartMaxValue points and discount by

667 // kStartMaxValue/kMaxSignificantStart points for each character later than	686 // kStartMaxValue/kMaxSignificantChars points for each character later than

668 // the first at which the term begins. No points are earned if the start of	687 // the first at which the term begins. No points are earned if the start of

669 // the match occurs at or after kMaxSignificantStart.	688 // the match occurs at or after kMaxSignificantChars.

670 const size_t kMaxSignificantStart = 50;

671 const int kStartMaxValue = 1000;	689 const int kStartMaxValue = 1000;

672 int start_value = (kMaxSignificantStart -	690 int start_value = (kMaxSignificantChars -

673 std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /	691 std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue /

674 kMaxSignificantStart;	692 kMaxSignificantChars;

675	693

676 // Score component for how much of the matched string the input terms cover.	694 // Score component for how much of the matched string the input terms cover.

677 // kCompleteMaxValue points times the fraction of the URL/page title string	695 // kCompleteMaxValue points times the fraction of the URL/page title string

678 // that was matched.	696 // that was matched.

679 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),	697 size_t term_length_total = std::accumulate(matches.begin(), matches.end(),

680 0, AccumulateMatchLength);	698 0, AccumulateMatchLength);

681 const size_t kMaxSignificantLength = 50;	699 const size_t kMaxSignificantLength = 50;

682 size_t max_significant_length =	700 size_t max_significant_length =

683 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));	701 std::min(max_length, std::max(term_length_total, kMaxSignificantLength));

684 const int kCompleteMaxValue = 1000;	702 const int kCompleteMaxValue = 1000;

(...skipping 236 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
921 }	939 }

922 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",	940 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

923 base::TimeTicks::Now() - beginning_time);	941 base::TimeTicks::Now() - beginning_time);

924 return true;	942 return true;

925 }	943 }

926	944

927 void URLIndexPrivateData::SavePrivateData(	945 void URLIndexPrivateData::SavePrivateData(

928 InMemoryURLIndexCacheItem* cache) const {	946 InMemoryURLIndexCacheItem* cache) const {

929 DCHECK(cache);	947 DCHECK(cache);

930 cache->set_timestamp(base::Time::Now().ToInternalValue());	948 cache->set_timestamp(base::Time::Now().ToInternalValue());

	949 cache->set_version(saved_cache_version_);

931 // history_item_count_ is no longer used but rather than change the protobuf	950 // history_item_count_ is no longer used but rather than change the protobuf

932 // definition use a placeholder. This will go away with the switch to SQLite.	951 // definition use a placeholder. This will go away with the switch to SQLite.

933 cache->set_history_item_count(0);	952 cache->set_history_item_count(0);

934 SaveWordList(cache);	953 SaveWordList(cache);

935 SaveWordMap(cache);	954 SaveWordMap(cache);

936 SaveCharWordMap(cache);	955 SaveCharWordMap(cache);

937 SaveWordIDHistoryMap(cache);	956 SaveWordIDHistoryMap(cache);

938 SaveHistoryInfoMap(cache);	957 SaveHistoryInfoMap(cache);

	958 SaveWordStartsMap(cache);

939 }	959 }

940	960

941 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	961 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

942 if (word_list_.empty())	962 if (word_list_.empty())

943 return;	963 return;

944 WordListItem* list_item = cache->mutable_word_list();	964 WordListItem* list_item = cache->mutable_word_list();

945 list_item->set_word_count(word_list_.size());	965 list_item->set_word_count(word_list_.size());

946 for (String16Vector::const_iterator iter = word_list_.begin();	966 for (String16Vector::const_iterator iter = word_list_.begin();

947 iter != word_list_.end(); ++iter)	967 iter != word_list_.end(); ++iter)

948 list_item->add_word(UTF16ToUTF8(*iter));	968 list_item->add_word(UTF16ToUTF8(*iter));

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1013 // is no need to save search_term_cache_ (not persistent),	1033 // is no need to save search_term_cache_ (not persistent),

1014 // languages_, etc.	1034 // languages_, etc.

1015 map_entry->set_visit_count(url_row.visit_count());	1035 map_entry->set_visit_count(url_row.visit_count());

1016 map_entry->set_typed_count(url_row.typed_count());	1036 map_entry->set_typed_count(url_row.typed_count());

1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1018 map_entry->set_url(url_row.url().spec());	1038 map_entry->set_url(url_row.url().spec());

1019 map_entry->set_title(UTF16ToUTF8(url_row.title()));	1039 map_entry->set_title(UTF16ToUTF8(url_row.title()));

1020 }	1040 }

1021 }	1041 }

1022	1042

	1043 void URLIndexPrivateData::SaveWordStartsMap(

	1044 InMemoryURLIndexCacheItem* cache) const {

	1045 if (word_starts_map_.empty())

	1046 return;

	1047 // For unit testing: Enable saving of the cache as an earlier version to

	1048 // allow testing of cache file upgrading in ReadFromFile().

	1049 // TODO(mrossetti): Instead of intruding on production code with this kind of

	1050 // test harness, save a copy of an older version cache with known results.

	1051 // Implement this when switching the caching over to SQLite.

	1052 if (saved_cache_version_ < 1)

	1053 return;

	1054

	1055 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

	1056 map_item->set_item_count(word_starts_map_.size());

	1057 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

	1058 iter != word_starts_map_.end(); ++iter) {

	1059 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

	1060 map_entry->set_history_id(iter->first);

	1061 const RowWordStarts& word_starts(iter->second);

	1062 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

	1063 i != word_starts.url_word_starts_.end(); ++i)

	1064 map_entry->add_url_word_starts(*i);

	1065 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

	1066 i != word_starts.title_word_starts_.end(); ++i)

	1067 map_entry->add_title_word_starts(*i);

	1068 }

	1069 }

	1070

1023 // Cache Restoring -------------------------------------------------------------	1071 // Cache Restoring -------------------------------------------------------------

1024	1072

1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {	1073 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {

1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.	1074 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.

1027 // That is: ensure that the database has not been modified since the cache	1075 // That is: ensure that the database has not been modified since the cache

1028 // was last saved. DB file modification date is inadequate. There are no	1076 // was last saved. DB file modification date is inadequate. There are no

1029 // SQLite table checksums automatically stored.	1077 // SQLite table checksums automatically stored.

1030 Clear(); // Start with a clean slate.	1078 Clear(); // Start with a clean slate.

1031	1079

1032 // FIXME(mrossetti): Move File IO to another thread.	1080 // FIXME(mrossetti): Move File IO to another thread.

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1083 rebuilt_data->history_id_word_map_.size());	1131 rebuilt_data->history_id_word_map_.size());

1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

1085 rebuilt_data->word_map_.size());	1133 rebuilt_data->word_map_.size());

1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	1134 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

1087 rebuilt_data->char_word_map_.size());	1135 rebuilt_data->char_word_map_.size());

1088 return rebuilt_data.release();	1136 return rebuilt_data.release();

1089 }	1137 }

1090	1138

1091 bool URLIndexPrivateData::RestorePrivateData(	1139 bool URLIndexPrivateData::RestorePrivateData(

1092 const InMemoryURLIndexCacheItem& cache) {	1140 const InMemoryURLIndexCacheItem& cache) {

	1141 if (cache.has_version())

	1142 restored_cache_version_ = cache.version();

1093 return RestoreWordList(cache) && RestoreWordMap(cache) &&	1143 return RestoreWordList(cache) && RestoreWordMap(cache) &&

1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&	1144 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

1095 RestoreHistoryInfoMap(cache);	1145 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);

1096 }	1146 }

1097	1147

1098 bool URLIndexPrivateData::RestoreWordList(	1148 bool URLIndexPrivateData::RestoreWordList(

1099 const InMemoryURLIndexCacheItem& cache) {	1149 const InMemoryURLIndexCacheItem& cache) {

1100 if (!cache.has_word_list())	1150 if (!cache.has_word_list())

1101 return false;	1151 return false;

1102 const WordListItem& list_item(cache.word_list());	1152 const WordListItem& list_item(cache.word_list());

1103 uint32 expected_item_count = list_item.word_count();	1153 uint32 expected_item_count = list_item.word_count();

1104 uint32 actual_item_count = list_item.word_size();	1154 uint32 actual_item_count = list_item.word_size();

1105 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1155 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1256 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

1207 if (iter->has_title()) {	1257 if (iter->has_title()) {

1208 string16 title(UTF8ToUTF16(iter->title()));	1258 string16 title(UTF8ToUTF16(iter->title()));

1209 url_row.set_title(title);	1259 url_row.set_title(title);

1210 }	1260 }

1211 history_info_map_[history_id] = url_row;	1261 history_info_map_[history_id] = url_row;

1212 }	1262 }

1213 return true;	1263 return true;

1214 }	1264 }

1215	1265

	1266 bool URLIndexPrivateData::RestoreWordStartsMap(

	1267 const InMemoryURLIndexCacheItem& cache) {

	1268 // Note that this function must be called after RestoreHistoryInfoMap() has

	1269 // been run as the word starts may have to be recalculated from the urls and

	1270 // page titles.

	1271 if (cache.has_word_starts_map()) {

	1272 const WordStartsMapItem& list_item(cache.word_starts_map());

	1273 uint32 expected_item_count = list_item.item_count();

	1274 uint32 actual_item_count = list_item.word_starts_map_entry_size();

	1275 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1276 return false;

	1277 const RepeatedPtrField<WordStartsMapEntry>&

	1278 entries(list_item.word_starts_map_entry());

	1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

	1280 entries.begin(); iter != entries.end(); ++iter) {

	1281 HistoryID history_id = iter->history_id();

	1282 RowWordStarts word_starts;

	1283 // Restore the URL word starts.

	1284 const RepeatedField<int32>& url_starts(iter->url_word_starts());

	1285 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();

	1286 jiter != url_starts.end(); ++jiter)

	1287 word_starts.url_word_starts_.push_back(*jiter);

	1288 // Restore the page title word starts.

	1289 const RepeatedField<int32>& title_starts(iter->title_word_starts());

	1290 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

	1291 jiter != title_starts.end(); ++jiter)

	1292 word_starts.title_word_starts_.push_back(*jiter);

	1293 word_starts_map_[history_id] = word_starts;

	1294 }

	1295 } else {

	1296 // Since the cache did not contain any word starts we must rebuild then from

	1297 // the URL and page titles.

	1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

	1299 iter != history_info_map_.end(); ++iter) {

	1300 RowWordStarts word_starts;

	1301 const URLRow& row(iter->second);

	1302 string16 url(net::FormatUrl(row.url(), languages_,

	1303 net::kFormatUrlOmitUsernamePassword,

	1304 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

	1305 NULL, NULL, NULL));

	1306 url = base::i18n::ToLower(url);

	1307 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

	1308 String16VectorFromString16(

	1309 row.title(), false, &word_starts.title_word_starts_);

	1310 word_starts_map_[iter->first] = word_starts;

	1311 }

	1312 }

	1313 return true;

	1314 }

	1315

1216 } // namespace history	1316 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »