chrome/browser/history/url_index_private_data.cc - Issue 10872032: Revert 152946 - Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database.

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 10872032: Revert 152946 - Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

11 #include <numeric>	11 #include <numeric>

12 #include <vector>	12 #include <vector>

13	13

14 #include "base/basictypes.h"	14 #include "base/basictypes.h"

15 #include "base/file_util.h"	15 #include "base/file_util.h"

16 #include "base/i18n/case_conversion.h"	16 #include "base/i18n/case_conversion.h"

	17 #include "base/metrics/histogram.h"

17 #include "base/string_util.h"	18 #include "base/string_util.h"

	19 #include "base/time.h"

18 #include "base/utf_string_conversions.h"	20 #include "base/utf_string_conversions.h"

19 #include "chrome/browser/autocomplete/autocomplete_provider.h"	21 #include "chrome/browser/autocomplete/autocomplete_provider.h"

	22 #include "chrome/browser/autocomplete/url_prefix.h"

20 #include "chrome/browser/history/history_database.h"	23 #include "chrome/browser/history/history_database.h"

21 #include "chrome/browser/history/in_memory_url_cache_database.h"	24 #include "chrome/browser/history/in_memory_url_index.h"

22 #include "chrome/common/chrome_constants.h"

23 #include "chrome/common/url_constants.h"

24 #include "content/public/browser/browser_thread.h"	25 #include "content/public/browser/browser_thread.h"

25 #include "content/public/browser/notification_details.h"	26 #include "content/public/browser/notification_details.h"

26 #include "content/public/browser/notification_service.h"	27 #include "content/public/browser/notification_service.h"

27 #include "content/public/browser/notification_source.h"	28 #include "content/public/browser/notification_source.h"

28 #include "net/base/net_util.h"	29 #include "net/base/net_util.h"

	30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

29	31

30 using content::BrowserThread;	32 using google::protobuf::RepeatedField;

	33 using google::protobuf::RepeatedPtrField;

	34 using in_memory_url_index::InMemoryURLIndexCacheItem;

31	35

32 namespace history {	36 namespace history {

33	37

34 // Comparison function for sorting search terms by descending length.	38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;

35 bool LengthGreater(const string16& string_a, const string16& string_b) {	39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;

36 return string_a.length() > string_b.length();	40 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

37 }	41 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;

38	42 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry

39 // Initializes a whitelist of URL schemes.	43 CharWordMapEntry;

40 void InitializeSchemeWhitelist(std::set<std::string>* whitelist) {	44 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

41 DCHECK(whitelist);	45 WordIDHistoryMapItem;

42 if (!whitelist->empty())	46 typedef imui::

43 return; // Nothing to do, already initialized.	47 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

44 whitelist->insert(std::string(chrome::kAboutScheme));	48 WordIDHistoryMapEntry;

45 whitelist->insert(std::string(chrome::kChromeUIScheme));	49 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

46 whitelist->insert(std::string(chrome::kFileScheme));	50 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

47 whitelist->insert(std::string(chrome::kFtpScheme));	51 HistoryInfoMapEntry;

48 whitelist->insert(std::string(chrome::kHttpScheme));	52 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;

49 whitelist->insert(std::string(chrome::kHttpsScheme));	53 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

50 whitelist->insert(std::string(chrome::kMailToScheme));	54 WordStartsMapEntry;

51 }

52

53 // CacheTransaction ------------------------------------------------------------

54

55 // Simple automatic helper class encapsulating calls to BeginTransaction/

56 // CommitTransaction.

57 class CacheTransaction {

58 public:

59 explicit CacheTransaction(InMemoryURLCacheDatabase* db);

60 ~CacheTransaction();

61

62 private:

63 InMemoryURLCacheDatabase* db_;

64

65 DISALLOW_COPY_AND_ASSIGN(CacheTransaction);

66 };

67

68 CacheTransaction::CacheTransaction(InMemoryURLCacheDatabase* db)

69 : db_(db) {

70 if (db_)

71 db_->BeginTransaction();

72 }

73

74 CacheTransaction::~CacheTransaction() {

75 if (db_)

76 db_->CommitTransaction();

77 }

78

79 // URLIndexPrivateData Public (but only to InMemoryURLIndex) Functions ---------

80

81 URLIndexPrivateData::URLIndexPrivateData(const FilePath& history_dir,

82 const std::string& languages)

83 : history_dir_(history_dir),

84 languages_(languages),

85 cache_enabled_(true),

86 shutdown_(false),

87 lock_(),

88 pre_filter_item_count_(0),

89 post_filter_item_count_(0),

90 post_scoring_item_count_(0) {

91 InitializeSchemeWhitelist(&scheme_whitelist_);

92 }

93

94 bool URLIndexPrivateData::Init(

95 base::SequencedWorkerPool::SequenceToken sequence_token) {

96 // Since init occurs on the DB thread, it is possible that the profile was

97 // told to shutdown before this task had a chance to kick off.

98 base::AutoLock lock(lock_);

99 if (shutdown_)

100 return false;

101 cache_db_ = new InMemoryURLCacheDatabase();

102 FilePath dir_path;

103 set_cache_enabled(GetCacheDBPath(&dir_path) &&

104 cache_db_->Init(dir_path, sequence_token));

105 return cache_enabled();

106 }

107

108 void URLIndexPrivateData::Reset() {

109 Clear();

110 if (cache_enabled())

111 cache_db_->Reset();

112 }

113

114 bool URLIndexPrivateData::Empty() const {

115 return history_info_map_.empty();

116 }

117

118 URLIndexPrivateData* URLIndexPrivateData::Snapshot() const {

119 return new URLIndexPrivateData(*this);

120 }

121

122 void URLIndexPrivateData::Shutdown() {

123 base::AutoLock lock(lock_);

124 shutdown_ = true;

125 if (cache_enabled())

126 cache_db_->Shutdown();

127 }

128

129 // Helper predicates for ValidateConsistency.

130 template <typename Pair>

131 struct SelectFirst : std::unary_function<Pair, typename Pair::first_type> {

132 const typename Pair::first_type& operator()(const Pair& x) const {

133 return x.first;

134 }

135 };

136

137 template <typename Pair>

138 struct SelectSecond : std::unary_function<Pair, typename Pair::second_type> {

139 const typename Pair::second_type& operator()(const Pair& x) const {

140 return x.second;

141 }

142 };

143

144 template <typename SourcePair, typename Target>

145 struct TargetInserter : std::unary_function<SourcePair, void> {

146 explicit TargetInserter(Target* target) : target_(target) {}

147

148 void operator()(const SourcePair& source) {

149 target_->insert(source.second.begin(), source.second.end());

150 }

151

152 Target* target_;

153 };

154

155 bool URLIndexPrivateData::ValidateConsistency() const {

156 // Scope things so that a large data set doesn't unnecessarily accumulate and

157 // hang around.

158 {

159 // Make a set of WordIDs from word_map_.

160 WordIDSet word_id_set_a;

161 std::transform(word_map_.begin(), word_map_.end(),

162 std::inserter(word_id_set_a, word_id_set_a.begin()),

163 SelectSecond<WordMap::value_type>());

164

165 // Compare word_map_'s word set to the words from word_id_history_map_.

166 {

167 WordIDSet word_id_set_b;

168 std::transform(word_id_history_map_.begin(), word_id_history_map_.end(),

169 std::inserter(word_id_set_b, word_id_set_b.begin()),

170 SelectFirst<WordIDHistoryMap::value_type>());

171 WordIDSet leftovers;

172 std::set_symmetric_difference(

173 word_id_set_a.begin(), word_id_set_a.end(),

174 word_id_set_b.begin(), word_id_set_b.end(),

175 std::inserter(leftovers, leftovers.begin()));

176 if (!leftovers.empty())

177 return false;

178 }

179

180 // Compare word_map_'s word set to the words from history_id_word_map_.

181 {

182 WordIDSet word_id_set_b;

183 std::for_each(history_id_word_map_.begin(), history_id_word_map_.end(),

184 TargetInserter<HistoryIDWordMap::value_type,

185 WordIDSet>(&word_id_set_b));

186 WordIDSet leftovers;

187 std::set_symmetric_difference(

188 word_id_set_a.begin(), word_id_set_a.end(),

189 word_id_set_b.begin(), word_id_set_b.end(),

190 std::inserter(leftovers, leftovers.begin()));

191 if (!leftovers.empty())

192 return false;

193 }

194

195 // Compare word_map_'s word set to the words from char_word_map_.

196 {

197 WordIDSet word_id_set_b;

198 std::for_each(char_word_map_.begin(), char_word_map_.end(),

199 TargetInserter<CharWordIDMap::value_type,

200 WordIDSet>(&word_id_set_b));

201 WordIDSet leftovers;

202 std::set_symmetric_difference(

203 word_id_set_a.begin(), word_id_set_a.end(),

204 word_id_set_b.begin(), word_id_set_b.end(),

205 std::inserter(leftovers, leftovers.begin()));

206 if (!leftovers.empty())

207 return false;

208 }

209

210 // Intersect available_words_ with set of WordIDs (created above from

211 // word_map_) and test for no common WordIDs.

212 {

213 WordIDSet leftovers;

214 std::set_intersection(word_id_set_a.begin(), word_id_set_a.end(),

215 available_words_.begin(), available_words_.end(),

216 std::inserter(leftovers, leftovers.begin()));

217 if (!leftovers.empty())

218 return false;

219 }

220 }

221

222 {

223 // Make a set of HistoryIDs from history_info_map_.

224 HistoryIDSet history_id_set_a;

225 std::transform(history_info_map_.begin(), history_info_map_.end(),

226 std::inserter(history_id_set_a, history_id_set_a.begin()),

227 SelectFirst<HistoryInfoMap::value_type>());

228

229 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

230 {

231 HistoryIDSet history_id_set_b;

232 std::transform(history_info_map_.begin(), history_info_map_.end(),

233 std::inserter(history_id_set_b, history_id_set_b.begin()),

234 SelectFirst<HistoryInfoMap::value_type>());

235 HistoryIDSet leftovers;

236 std::set_symmetric_difference(

237 history_id_set_a.begin(), history_id_set_a.end(),

238 history_id_set_b.begin(), history_id_set_b.end(),

239 std::inserter(leftovers, leftovers.begin()));

240 if (!leftovers.empty())

241 return false;

242 }

243

244 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

245 {

246 HistoryIDSet history_id_set_b;

247 std::for_each(word_id_history_map_.begin(), word_id_history_map_.end(),

248 TargetInserter<WordIDHistoryMap::value_type,

249 HistoryIDSet>(&history_id_set_b));

250 HistoryIDSet leftovers;

251 std::set_symmetric_difference(

252 history_id_set_a.begin(), history_id_set_a.end(),

253 history_id_set_b.begin(), history_id_set_b.end(),

254 std::inserter(leftovers, leftovers.begin()));

255 if (!leftovers.empty())

256 return false;

257 }

258

259 // Compare history_info_map_'s set to word_starts_map_'s HistoryIDs.

260 {

261 HistoryIDSet history_id_set_b;

262 std::transform(word_starts_map_.begin(), word_starts_map_.end(),

263 std::inserter(history_id_set_b, history_id_set_b.begin()),

264 SelectFirst<WordStartsMap::value_type>());

265 HistoryIDSet leftovers;

266 std::set_symmetric_difference(

267 history_id_set_a.begin(), history_id_set_a.end(),

268 history_id_set_b.begin(), history_id_set_b.end(),

269 std::inserter(leftovers, leftovers.begin()));

270 if (!leftovers.empty())

271 return false;

272 }

273 }

274

275 // Make sets of words from word_list_ and word_map_ and test for equality.

276 {

277 String16Set word_list_set;

278 std::copy(word_list_.begin(), word_list_.end(),

279 std::inserter(word_list_set, word_list_set.end()));

280 String16Set word_map_set;

281 std::transform(word_map_.begin(), word_map_.end(),

282 std::inserter(word_map_set, word_map_set.begin()),

283 SelectFirst<WordMap::value_type>());

284 if (word_list_set != word_map_set)

285 return false;

286 }

287

288 return true;

289 }

290

291 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

292 const string16& search_string) {

293 pre_filter_item_count_ = 0;

294 post_filter_item_count_ = 0;

295 post_scoring_item_count_ = 0;

296 // The search string we receive may contain escaped characters. For reducing

297 // the index we need individual, lower-cased words, ignoring escapings. For

298 // the final filtering we need whitespace separated substrings possibly

299 // containing escaped characters.

300 string16 lower_raw_string(base::i18n::ToLower(search_string));

301 string16 lower_unescaped_string =

302 net::UnescapeURLComponent(lower_raw_string,

303 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

304 // Extract individual 'words' (as opposed to 'terms'; see below) from the

305 // search string. When the user types "colspec=ID%20Mstone Release" we get

306 // four 'words': "colspec", "id", "mstone" and "release".

307 String16Vector lower_words(

308 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

309 ScoredHistoryMatches scored_items;

310

311 // Do nothing if we have indexed no words (probably because we've not been

312 // initialized yet) or the search string has no words.

313 if (word_list_.empty() \|\| lower_words.empty()) {

314 search_term_cache_.clear(); // Invalidate the term cache.

315 return scored_items;

316 }

317

318 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

319 // approach.

320 ResetSearchTermCache();

321

322 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

323

324 // Trim the candidate pool if it is large. Note that we do not filter out

325 // items that do not contain the search terms as proper substrings -- doing

326 // so is the performance-costly operation we are trying to avoid in order

327 // to maintain omnibox responsiveness.

328 const size_t kItemsToScoreLimit = 500;

329 pre_filter_item_count_ = history_id_set.size();

330 // If we trim the results set we do not want to cache the results for next

331 // time as the user's ultimately desired result could easily be eliminated

332 // in this early rough filter.

333 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

334 if (was_trimmed) {

335 HistoryIDVector history_ids;

336 std::copy(history_id_set.begin(), history_id_set.end(),

337 std::back_inserter(history_ids));

338 // Trim down the set by sorting by typed-count, visit-count, and last

339 // visit.

340 HistoryItemFactorGreater

341 item_factor_functor(history_info_map_);

342 std::partial_sort(history_ids.begin(),

343 history_ids.begin() + kItemsToScoreLimit,

344 history_ids.end(),

345 item_factor_functor);

346 history_id_set.clear();

347 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

348 std::inserter(history_id_set, history_id_set.end()));

349 post_filter_item_count_ = history_id_set.size();

350 }

351

352 // Pass over all of the candidates filtering out any without a proper

353 // substring match, inserting those which pass in order by score. Note that

354 // in this step we are using the raw search string complete with escaped

355 // URL elements. When the user has specifically typed something akin to

356 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

357 // specific substring appears in the URL or page title.

358

359 // We call these 'terms' (as opposed to 'words'; see above) as in this case

360 // we only want to break up the search string on 'true' whitespace rather than

361 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

362 // get two 'terms': "colspec=id%20mstone" and "release".

363 history::String16Vector lower_raw_terms;

364 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

365 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

366 AddHistoryMatch(*this, lower_raw_string,

367 lower_raw_terms, base::Time::Now())).ScoredMatches();

368

369 // Select and sort only the top kMaxMatches results.

370 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

371 std::partial_sort(scored_items.begin(),

372 scored_items.begin() +

373 AutocompleteProvider::kMaxMatches,

374 scored_items.end(),

375 ScoredHistoryMatch::MatchScoreGreater);

376 scored_items.resize(AutocompleteProvider::kMaxMatches);

377 } else {

378 std::sort(scored_items.begin(), scored_items.end(),

379 ScoredHistoryMatch::MatchScoreGreater);

380 }

381 post_scoring_item_count_ = scored_items.size();

382

383 if (was_trimmed) {

384 search_term_cache_.clear(); // Invalidate the term cache.

385 } else {

386 // Remove any stale SearchTermCacheItems.

387 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

388 cache_iter != search_term_cache_.end(); ) {

389 if (!cache_iter->second.used_)

390 search_term_cache_.erase(cache_iter++);

391 else

392 ++cache_iter;

393 }

394 }

395

396 return scored_items;

397 }

398

399 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {

400 // The row may or may not already be in our index. If it is not already

401 // indexed and it qualifies then it gets indexed. If it is already

402 // indexed and still qualifies then it gets updated, otherwise it

403 // is deleted from the index.

404 bool row_was_updated = false;

405 URLID row_id = row.id();

406 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

407 if (row_pos == history_info_map_.end()) {

408 // This new row should be indexed if it qualifies.

409 URLRow new_row(row);

410 new_row.set_id(row_id);

411 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

412 IndexRow(new_row);

413 } else if (RowQualifiesAsSignificant(row, base::Time())) {

414 // This indexed row still qualifies and will be re-indexed.

415 // The url won't have changed but the title, visit count, etc.

416 // might have changed.

417 URLRow& row_to_update = row_pos->second;

418 bool title_updated = row_to_update.title() != row.title();

419 if (row_to_update.visit_count() != row.visit_count() \|\|

420 row_to_update.typed_count() != row.typed_count() \|\|

421 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

422 CacheTransaction cache_transaction(cache_enabled() ?

423 cache_db_.get() : NULL);

424 row_to_update.set_visit_count(row.visit_count());

425 row_to_update.set_typed_count(row.typed_count());

426 row_to_update.set_last_visit(row.last_visit());

427 row_to_update.set_hidden(row.hidden());

428 // While the URL is guaranteed to remain stable, the title may have

429 // changed. If so, then update the index with the changed words.

430 if (title_updated) {

431 // Clear all words associated with this row and re-index both the

432 // URL and title.

433 RemoveRowWordsFromIndex(row_to_update);

434 row_to_update.set_title(row.title());

435 RowWordStarts word_starts;

436 AddRowWordsToIndex(row_to_update, &word_starts);

437 word_starts_map_[row_id] = word_starts;

438 // Replace all word_starts for the row.

439 if (cache_enabled()) {

440 cache_db_->RemoveWordStarts(row_id);

441 cache_db_->AddRowWordStarts(row_id, word_starts);

442 }

443 }

444 if (cache_enabled()) {

445 cache_db_->RemoveHistoryIDFromURLs(row_id);

446 cache_db_->AddHistoryToURLs(row_id, row);

447 }

448 row_was_updated = true;

449 }

450 } else {

451 // This indexed row no longer qualifies and will be de-indexed by

452 // clearing all words associated with this row.

453 RemoveRowFromIndex(row);

454 row_was_updated = true;

455 }

456 if (row_was_updated)

457 search_term_cache_.clear(); // This invalidates the cache.

458 return row_was_updated;

459 }

460

461 // Helper functor for DeleteURL.

462 class HistoryInfoMapItemHasURL {

463 public:

464 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

465

466 bool operator()(const std::pair<const HistoryID, URLRow>& item) {

467 return item.second.url() == url_;

468 }

469

470 private:

471 const GURL& url_;

472 };

473

474 bool URLIndexPrivateData::DeleteURL(const GURL& url) {

475 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

476 BrowserThread::CurrentlyOn(BrowserThread::UI));

477 // Find the matching entry in the history_info_map_.

478 HistoryInfoMap::iterator pos = std::find_if(

479 history_info_map_.begin(),

480 history_info_map_.end(),

481 HistoryInfoMapItemHasURL(url));

482 if (pos == history_info_map_.end())

483 return false;

484 RemoveRowFromIndex(pos->second);

485 search_term_cache_.clear(); // This invalidates the cache.

486 return true;

487 }

488

489 bool URLIndexPrivateData::RestoreFromCacheTask() {

490 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

491 !BrowserThread::CurrentlyOn(BrowserThread::UI));

492 DCHECK(cache_db_);

493 if (IsShutdown())

494 return false;

495 Clear();

496 if (RestoreFromCache(cache_db_))

497 return true;

498 // If there was no SQLite-based cache then there might be an old,

499 // deprecated, protobuf-based cache file laying around. Get rid of it.

500 DeleteOldVersionCacheFile();

501 return false;

502 }

503

504 // static

505 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

506 HistoryDatabase* history_db,

507 scoped_refptr<URLIndexPrivateData> old_data) {

508 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

509 !BrowserThread::CurrentlyOn(BrowserThread::UI));

510 if (!history_db)

511 return NULL;

512

513 scoped_refptr<URLIndexPrivateData> rebuilt_data(

514 new URLIndexPrivateData(*(old_data.get())));

515

516 // NOTE: We disable the cache database until after the replacement private

517 // data has been created and then we re-enable the database. Once the private

518 // data has been swapped in the database is refreshed with the new data.

519 URLDatabase::URLEnumerator history_enum;

520 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

521 return NULL;

522

523 for (URLRow row; !old_data->IsShutdown() && history_enum.GetNextURL(&row); )

524 rebuilt_data->IndexRow(row);

525

526 if (old_data->IsShutdown()) {

527 rebuilt_data.release();

528 }

529

530 return rebuilt_data;

531 }

532

533 void URLIndexPrivateData::RefreshCacheTask() {

534 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

535 !BrowserThread::CurrentlyOn(BrowserThread::UI));

536 base::AutoLock lock(lock_);

537 if (shutdown_ \|\| !cache_enabled())

538 return;

539 // Should a refresh fail for any reason we consider the database to be

540 // corrupt and unavailable; no further remedial action is possible.

541 set_cache_enabled(cache_db_->Reset() && cache_db_->Refresh(*this));

542 }

543

544 // static

545 void URLIndexPrivateData::InitializeSchemeWhitelistForTesting(

546 std::set<std::string>* whitelist) {

547 InitializeSchemeWhitelist(whitelist);

548 }

549	55

550 // SearchTermCacheItem ---------------------------------------------------------	56 // SearchTermCacheItem ---------------------------------------------------------

551	57

552 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(	58 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

553 const WordIDSet& word_id_set,	59 const WordIDSet& word_id_set,

554 const HistoryIDSet& history_id_set)	60 const HistoryIDSet& history_id_set)

555 : word_id_set_(word_id_set),	61 : word_id_set_(word_id_set),

556 history_id_set_(history_id_set),	62 history_id_set_(history_id_set),

557 used_(true) {}	63 used_(true) {}

558	64

559 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()	65 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()

560 : used_(true) {}	66 : used_(true) {}

561	67

562 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}	68 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

563	69

564 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------	70 // Algorithm Functions ---------------------------------------------------------

565	71

566 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(	72 // Comparison function for sorting search terms by descending length.

567 const URLIndexPrivateData& private_data,	73 bool LengthGreater(const string16& string_a, const string16& string_b) {

568 const string16& lower_string,	74 return string_a.length() > string_b.length();

569 const String16Vector& lower_terms,

570 const base::Time now)

571 : private_data_(private_data),

572 lower_string_(lower_string),

573 lower_terms_(lower_terms),

574 now_(now) {}

575

576 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

577

578 void URLIndexPrivateData::AddHistoryMatch::operator()(

579 const HistoryID history_id) {

580 HistoryInfoMap::const_iterator hist_pos =

581 private_data_.history_info_map_.find(history_id);

582 if (hist_pos != private_data_.history_info_map_.end()) {

583 const URLRow& hist_item = hist_pos->second;

584 WordStartsMap::const_iterator starts_pos =

585 private_data_.word_starts_map_.find(history_id);

586 DCHECK(starts_pos != private_data_.word_starts_map_.end());

587 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

588 starts_pos->second, now_);

589 if (match.raw_score > 0)

590 scored_matches_.push_back(match);

591 }

592 }	75 }

593	76

594 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	77 // InMemoryURLIndex's Private Data ---------------------------------------------

595	78

596 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	79 URLIndexPrivateData::URLIndexPrivateData()

597 const HistoryInfoMap& history_info_map)	80 : restored_cache_version_(0),

598 : history_info_map_(history_info_map) {	81 saved_cache_version_(kCurrentCacheFileVersion),

599 }

600

601 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

602

603 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

604 const HistoryID h1,

605 const HistoryID h2) {

606 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

607 if (entry1 == history_info_map_.end())

608 return false;

609 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

610 if (entry2 == history_info_map_.end())

611 return true;

612 const URLRow& r1(entry1->second);

613 const URLRow& r2(entry2->second);

614 // First cut: typed count, visit count, recency.

615 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

616 // recently visited (within the last 12/24 hours) as highly important. Get

617 // input from mpearson.

618 if (r1.typed_count() != r2.typed_count())

619 return (r1.typed_count() > r2.typed_count());

620 if (r1.visit_count() != r2.visit_count())

621 return (r1.visit_count() > r2.visit_count());

622 return (r1.last_visit() > r2.last_visit());

623 }

624

625 // URLIndexPrivateData Private Functions ---------------------------------------

626

627 URLIndexPrivateData::URLIndexPrivateData(const URLIndexPrivateData& old_data)

628 : history_dir_(old_data.history_dir_),

629 languages_(old_data.languages_),

630 cache_enabled_(old_data.cache_enabled_),

631 shutdown_(old_data.IsShutdown()),

632 lock_(),

633 scheme_whitelist_(old_data.scheme_whitelist_),

634 pre_filter_item_count_(0),	82 pre_filter_item_count_(0),

635 post_filter_item_count_(0),	83 post_filter_item_count_(0),

636 post_scoring_item_count_(0) {	84 post_scoring_item_count_(0) {

637 cache_db_ = old_data.cache_db_;

638 }

639

640 // Called only by unit tests.

641 URLIndexPrivateData::URLIndexPrivateData()

642 : cache_enabled_(true),

643 shutdown_(false),

644 lock_(),

645 pre_filter_item_count_(0),

646 post_filter_item_count_(0),

647 post_scoring_item_count_(0) {

648 InitializeSchemeWhitelist(&scheme_whitelist_);

649 }	85 }

650	86

651 URLIndexPrivateData::~URLIndexPrivateData() {}	87 URLIndexPrivateData::~URLIndexPrivateData() {}

652	88

653 bool URLIndexPrivateData::IsShutdown() const {

654 base::AutoLock lock(lock_);

655 return shutdown_;

656 }

657

658 void URLIndexPrivateData::Clear() {	89 void URLIndexPrivateData::Clear() {

659 word_list_.clear();	90 word_list_.clear();

660 available_words_.clear();	91 available_words_.clear();

661 word_map_.clear();	92 word_map_.clear();

662 char_word_map_.clear();	93 char_word_map_.clear();

663 word_id_history_map_.clear();	94 word_id_history_map_.clear();

664 history_id_word_map_.clear();	95 history_id_word_map_.clear();

665 history_info_map_.clear();	96 history_info_map_.clear();

666 word_starts_map_.clear();	97 word_starts_map_.clear();

667 }	98 }

668	99

	100 bool URLIndexPrivateData::Empty() const {

	101 return history_info_map_.empty();

	102 }

	103

	104 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {

	105 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;

	106 data_copy->word_list_ = word_list_;

	107 data_copy->available_words_ = available_words_;

	108 data_copy->word_map_ = word_map_;

	109 data_copy->char_word_map_ = char_word_map_;

	110 data_copy->word_id_history_map_ = word_id_history_map_;

	111 data_copy->history_id_word_map_ = history_id_word_map_;

	112 data_copy->history_info_map_ = history_info_map_;

	113 data_copy->word_starts_map_ = word_starts_map_;

	114 return data_copy;

	115 // Not copied:

	116 // search_term_cache_

	117 // pre_filter_item_count_

	118 // post_filter_item_count_

	119 // post_scoring_item_count_

	120 };

	121

669 // Cache Updating --------------------------------------------------------------	122 // Cache Updating --------------------------------------------------------------

670	123

671 bool URLIndexPrivateData::IndexRow(const URLRow& row) {	124 bool URLIndexPrivateData::IndexRow(

	125 const URLRow& row,

	126 const std::string& languages,

	127 const std::set<std::string>& scheme_whitelist) {

672 const GURL& gurl(row.url());	128 const GURL& gurl(row.url());

673	129

674 // Index only URLs with a whitelisted scheme.	130 // Index only URLs with a whitelisted scheme.

675 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist_))	131 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

676 return false;	132 return false;

677	133

	134 URLID row_id = row.id();

678 // Strip out username and password before saving and indexing.	135 // Strip out username and password before saving and indexing.

679 string16 url(net::FormatUrl(gurl, languages_,	136 string16 url(net::FormatUrl(gurl, languages,

680 net::kFormatUrlOmitUsernamePassword,	137 net::kFormatUrlOmitUsernamePassword,

681 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	138 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

682 NULL, NULL, NULL));	139 NULL, NULL, NULL));

683	140

684 URLID row_id = row.id();

685 HistoryID history_id = static_cast<HistoryID>(row_id);	141 HistoryID history_id = static_cast<HistoryID>(row_id);

686 DCHECK(history_info_map_.find(history_id) == history_info_map_.end());

687 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());	142 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

688	143

689 // Add the row for quick lookup in the history info store.	144 // Add the row for quick lookup in the history info store.

690 URLRow new_row(GURL(url), row_id);	145 URLRow new_row(GURL(url), row_id);

691 new_row.set_visit_count(row.visit_count());	146 new_row.set_visit_count(row.visit_count());

692 new_row.set_typed_count(row.typed_count());	147 new_row.set_typed_count(row.typed_count());

693 new_row.set_last_visit(row.last_visit());	148 new_row.set_last_visit(row.last_visit());

694 new_row.set_title(row.title());	149 new_row.set_title(row.title());

695

696 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

697 history_info_map_[history_id] = new_row;	150 history_info_map_[history_id] = new_row;

698 if (cache_enabled())

699 cache_db_->AddHistoryToURLs(history_id, row);

700	151

701 // Index the words contained in the URL and title of the row.	152 // Index the words contained in the URL and title of the row.

702 RowWordStarts word_starts;	153 RowWordStarts word_starts;

703 AddRowWordsToIndex(new_row, &word_starts);	154 AddRowWordsToIndex(new_row, &word_starts, languages);

704 word_starts_map_[history_id] = word_starts;	155 word_starts_map_[history_id] = word_starts;

705 if (cache_enabled())

706 cache_db_->AddRowWordStarts(history_id, word_starts);

707 return true;	156 return true;

708 }	157 }

709	158

710 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,	159 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

711 RowWordStarts* word_starts) {	160 RowWordStarts* word_starts,

	161 const std::string& languages) {

712 HistoryID history_id = static_cast<HistoryID>(row.id());	162 HistoryID history_id = static_cast<HistoryID>(row.id());

713 // Split URL into individual, unique words then add in the title words.	163 // Split URL into individual, unique words then add in the title words.

714 const GURL& gurl(row.url());	164 const GURL& gurl(row.url());

715 string16 url(net::FormatUrl(gurl, languages_,	165 string16 url(net::FormatUrl(gurl, languages,

716 net::kFormatUrlOmitUsernamePassword,	166 net::kFormatUrlOmitUsernamePassword,

717 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	167 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

718 NULL, NULL, NULL));	168 NULL, NULL, NULL));

719 url = base::i18n::ToLower(url);	169 url = base::i18n::ToLower(url);

720 String16Set url_words = String16SetFromString16(url,	170 String16Set url_words = String16SetFromString16(url,

721 word_starts ? &word_starts->url_word_starts_ : NULL);	171 word_starts ? &word_starts->url_word_starts_ : NULL);

722 String16Set title_words = String16SetFromString16(row.title(),	172 String16Set title_words = String16SetFromString16(row.title(),

723 word_starts ? &word_starts->title_word_starts_ : NULL);	173 word_starts ? &word_starts->title_word_starts_ : NULL);

724 String16Set words;	174 String16Set words;

725 std::set_union(url_words.begin(), url_words.end(),	175 std::set_union(url_words.begin(), url_words.end(),

(...skipping 12 matching lines...) Expand all Loading...
738 if (word_pos != word_map_.end())	188 if (word_pos != word_map_.end())

739 UpdateWordHistory(word_pos->second, history_id);	189 UpdateWordHistory(word_pos->second, history_id);

740 else	190 else

741 AddWordHistory(term, history_id);	191 AddWordHistory(term, history_id);

742 }	192 }

743	193

744 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,	194 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

745 HistoryID history_id) {	195 HistoryID history_id) {

746 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);	196 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

747 DCHECK(history_pos != word_id_history_map_.end());	197 DCHECK(history_pos != word_id_history_map_.end());

748 // There is no need to record changes to the word_id_history_map_ in the cache

749 // because it can be rebuilt from the history_id_word_map_'s table.

750 HistoryIDSet& history_id_set(history_pos->second);	198 HistoryIDSet& history_id_set(history_pos->second);

751 history_id_set.insert(history_id);	199 history_id_set.insert(history_id);

752 AddToHistoryIDWordMap(history_id, word_id);	200 AddToHistoryIDWordMap(history_id, word_id);

753

754 // Add word_id/history_id entry to the word_history table

755 if (cache_enabled())

756 cache_db_->AddHistoryToWordHistory(word_id, history_id);

757 }	201 }

758	202

759 void URLIndexPrivateData::AddWordHistory(const string16& term,	203 void URLIndexPrivateData::AddWordHistory(const string16& term,

760 HistoryID history_id) {	204 HistoryID history_id) {

761 WordID word_id = word_list_.size();	205 WordID word_id = word_list_.size();

762 if (available_words_.empty()) {	206 if (available_words_.empty()) {

763 word_list_.push_back(term);	207 word_list_.push_back(term);

764 } else {	208 } else {

765 word_id = *(available_words_.begin());	209 word_id = *(available_words_.begin());

766 word_list_[word_id] = term;	210 word_list_[word_id] = term;

767 available_words_.erase(word_id);	211 available_words_.erase(word_id);

768 }	212 }

769 word_map_[term] = word_id;	213 word_map_[term] = word_id;

770	214

771 // Add word_id/term entry to the words table;

772 if (cache_enabled())

773 cache_db_->AddWordToWords(word_id, term);

774

775 HistoryIDSet history_id_set;	215 HistoryIDSet history_id_set;

776 history_id_set.insert(history_id);	216 history_id_set.insert(history_id);

777 word_id_history_map_[word_id] = history_id_set;	217 word_id_history_map_[word_id] = history_id_set;

778 AddToHistoryIDWordMap(history_id, word_id);	218 AddToHistoryIDWordMap(history_id, word_id);

779	219

780 // Add word_id/history_id entry to the word_history table

781 if (cache_enabled())

782 cache_db_->AddHistoryToWordHistory(word_id, history_id);

783

784 // For each character in the newly added word (i.e. a word that is not	220 // For each character in the newly added word (i.e. a word that is not

785 // already in the word index), add the word to the character index.	221 // already in the word index), add the word to the character index.

786 Char16Set characters = Char16SetFromString16(term);	222 Char16Set characters = Char16SetFromString16(term);

787 for (Char16Set::iterator uni_char_iter = characters.begin();	223 for (Char16Set::iterator uni_char_iter = characters.begin();

788 uni_char_iter != characters.end(); ++uni_char_iter) {	224 uni_char_iter != characters.end(); ++uni_char_iter) {

789 char16 uni_char = *uni_char_iter;	225 char16 uni_char = *uni_char_iter;

790 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);	226 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

791 if (char_iter != char_word_map_.end()) {	227 if (char_iter != char_word_map_.end()) {

792 // Update existing entry in the char/word index.	228 // Update existing entry in the char/word index.

793 WordIDSet& word_id_set(char_iter->second);	229 WordIDSet& word_id_set(char_iter->second);

794 word_id_set.insert(word_id);	230 word_id_set.insert(word_id);

795 } else {	231 } else {

796 // Create a new entry in the char/word index.	232 // Create a new entry in the char/word index.

797 WordIDSet word_id_set;	233 WordIDSet word_id_set;

798 word_id_set.insert(word_id);	234 word_id_set.insert(word_id);

799 char_word_map_[uni_char] = word_id_set;	235 char_word_map_[uni_char] = word_id_set;

800 }	236 }

801 // Add uni_char/word_id entry to the char_words database table.

802 if (cache_enabled())

803 cache_db_->AddWordToCharWords(uni_char, word_id);

804 }	237 }

805 }	238 }

806	239

807 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {	240 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

808 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

809 RemoveRowWordsFromIndex(row);	241 RemoveRowWordsFromIndex(row);

810 HistoryID history_id = static_cast<HistoryID>(row.id());	242 HistoryID history_id = static_cast<HistoryID>(row.id());

811 history_info_map_.erase(history_id);	243 history_info_map_.erase(history_id);

812 word_starts_map_.erase(history_id);	244 word_starts_map_.erase(history_id);

813 if (cache_enabled())

814 cache_db_->RemoveHistoryIDFromURLs(history_id);

815 }	245 }

816	246

817 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {	247 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

818 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	248 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

819 // this row.	249 // this row.

820 HistoryID history_id = static_cast<HistoryID>(row.id());	250 HistoryID history_id = static_cast<HistoryID>(row.id());

821 WordIDSet word_id_set = history_id_word_map_[history_id];	251 WordIDSet word_id_set = history_id_word_map_[history_id];

822 history_id_word_map_.erase(history_id);	252 history_id_word_map_.erase(history_id);

823	253

824 // Reconcile any changes to word usage.	254 // Reconcile any changes to word usage.

(...skipping 13 matching lines...) Expand all Loading...
838 char_word_map_[uni_char].erase(word_id);	268 char_word_map_[uni_char].erase(word_id);

839 if (char_word_map_[uni_char].empty())	269 if (char_word_map_[uni_char].empty())

840 char_word_map_.erase(uni_char); // No longer in use.	270 char_word_map_.erase(uni_char); // No longer in use.

841 }	271 }

842	272

843 // Complete the removal of references to the word.	273 // Complete the removal of references to the word.

844 word_id_history_map_.erase(word_id);	274 word_id_history_map_.erase(word_id);

845 word_map_.erase(word);	275 word_map_.erase(word);

846 word_list_[word_id] = string16();	276 word_list_[word_id] = string16();

847 available_words_.insert(word_id);	277 available_words_.insert(word_id);

848 if (cache_enabled())

849 cache_db_->RemoveWordFromWords(word_id);

850 }	278 }

851 if (cache_enabled())

852 cache_db_->RemoveHistoryIDFromWordHistory(history_id);

853 }	279 }

854	280

855 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,	281 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

856 WordID word_id) {	282 WordID word_id) {

857 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);	283 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

858 if (iter != history_id_word_map_.end()) {	284 if (iter != history_id_word_map_.end()) {

859 WordIDSet& word_id_set(iter->second);	285 WordIDSet& word_id_set(iter->second);

860 word_id_set.insert(word_id);	286 word_id_set.insert(word_id);

861 } else {	287 } else {

862 WordIDSet word_id_set;	288 WordIDSet word_id_set;

863 word_id_set.insert(word_id);	289 word_id_set.insert(word_id);

864 history_id_word_map_[history_id] = word_id_set;	290 history_id_word_map_[history_id] = word_id_set;

865 }	291 }

866 }	292 }

867	293

	294 bool URLIndexPrivateData::UpdateURL(

	295 const URLRow& row,

	296 const std::string& languages,

	297 const std::set<std::string>& scheme_whitelist) {

	298 // The row may or may not already be in our index. If it is not already

	299 // indexed and it qualifies then it gets indexed. If it is already

	300 // indexed and still qualifies then it gets updated, otherwise it

	301 // is deleted from the index.

	302 bool row_was_updated = false;

	303 URLID row_id = row.id();

	304 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

	305 if (row_pos == history_info_map_.end()) {

	306 // This new row should be indexed if it qualifies.

	307 URLRow new_row(row);

	308 new_row.set_id(row_id);

	309 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

	310 IndexRow(new_row, languages, scheme_whitelist);

	311 } else if (RowQualifiesAsSignificant(row, base::Time())) {

	312 // This indexed row still qualifies and will be re-indexed.

	313 // The url won't have changed but the title, visit count, etc.

	314 // might have changed.

	315 URLRow& row_to_update = row_pos->second;

	316 bool title_updated = row_to_update.title() != row.title();

	317 if (row_to_update.visit_count() != row.visit_count() \|\|

	318 row_to_update.typed_count() != row.typed_count() \|\|

	319 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

	320 row_to_update.set_visit_count(row.visit_count());

	321 row_to_update.set_typed_count(row.typed_count());

	322 row_to_update.set_last_visit(row.last_visit());

	323 // While the URL is guaranteed to remain stable, the title may have

	324 // changed. If so, then update the index with the changed words.

	325 if (title_updated) {

	326 // Clear all words associated with this row and re-index both the

	327 // URL and title.

	328 RemoveRowWordsFromIndex(row_to_update);

	329 row_to_update.set_title(row.title());

	330 RowWordStarts word_starts;

	331 AddRowWordsToIndex(row_to_update, &word_starts, languages);

	332 word_starts_map_[row_id] = word_starts;

	333 }

	334 row_was_updated = true;

	335 }

	336 } else {

	337 // This indexed row no longer qualifies and will be de-indexed by

	338 // clearing all words associated with this row.

	339 RemoveRowFromIndex(row);

	340 row_was_updated = true;

	341 }

	342 if (row_was_updated)

	343 search_term_cache_.clear(); // This invalidates the cache.

	344 return row_was_updated;

	345 }

	346

	347 // Helper functor for DeleteURL.

	348 class HistoryInfoMapItemHasURL {

	349 public:

	350 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

	351

	352 bool operator()(const std::pair<const HistoryID, URLRow>& item) {

	353 return item.second.url() == url_;

	354 }

	355

	356 private:

	357 const GURL& url_;

	358 };

	359

	360 bool URLIndexPrivateData::DeleteURL(const GURL& url) {

	361 // Find the matching entry in the history_info_map_.

	362 HistoryInfoMap::iterator pos = std::find_if(

	363 history_info_map_.begin(),

	364 history_info_map_.end(),

	365 HistoryInfoMapItemHasURL(url));

	366 if (pos == history_info_map_.end())

	367 return false;

	368 RemoveRowFromIndex(pos->second);

	369 search_term_cache_.clear(); // This invalidates the cache.

	370 return true;

	371 }

	372

	373 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

	374

	375 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

	376 const HistoryInfoMap& history_info_map)

	377 : history_info_map_(history_info_map) {

	378 }

	379

	380 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

	381

	382 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

	383 const HistoryID h1,

	384 const HistoryID h2) {

	385 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

	386 if (entry1 == history_info_map_.end())

	387 return false;

	388 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

	389 if (entry2 == history_info_map_.end())

	390 return true;

	391 const URLRow& r1(entry1->second);

	392 const URLRow& r2(entry2->second);

	393 // First cut: typed count, visit count, recency.

	394 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

	395 // recently visited (within the last 12/24 hours) as highly important. Get

	396 // input from mpearson.

	397 if (r1.typed_count() != r2.typed_count())

	398 return (r1.typed_count() > r2.typed_count());

	399 if (r1.visit_count() != r2.visit_count())

	400 return (r1.visit_count() > r2.visit_count());

	401 return (r1.last_visit() > r2.last_visit());

	402 }

	403

	404 // Cache Searching -------------------------------------------------------------

	405

	406 // NOTE: This is the main public search function.

	407 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

	408 const string16& search_string) {

	409 pre_filter_item_count_ = 0;

	410 post_filter_item_count_ = 0;

	411 post_scoring_item_count_ = 0;

	412 // The search string we receive may contain escaped characters. For reducing

	413 // the index we need individual, lower-cased words, ignoring escapings. For

	414 // the final filtering we need whitespace separated substrings possibly

	415 // containing escaped characters.

	416 string16 lower_raw_string(base::i18n::ToLower(search_string));

	417 string16 lower_unescaped_string =

	418 net::UnescapeURLComponent(lower_raw_string,

	419 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

	420 // Extract individual 'words' (as opposed to 'terms'; see below) from the

	421 // search string. When the user types "colspec=ID%20Mstone Release" we get

	422 // four 'words': "colspec", "id", "mstone" and "release".

	423 String16Vector lower_words(

	424 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

	425 ScoredHistoryMatches scored_items;

	426

	427 // Do nothing if we have indexed no words (probably because we've not been

	428 // initialized yet) or the search string has no words.

	429 if (word_list_.empty() \|\| lower_words.empty()) {

	430 search_term_cache_.clear(); // Invalidate the term cache.

	431 return scored_items;

	432 }

	433

	434 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

	435 // approach.

	436 ResetSearchTermCache();

	437

	438 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

	439

	440 // Trim the candidate pool if it is large. Note that we do not filter out

	441 // items that do not contain the search terms as proper substrings -- doing

	442 // so is the performance-costly operation we are trying to avoid in order

	443 // to maintain omnibox responsiveness.

	444 const size_t kItemsToScoreLimit = 500;

	445 pre_filter_item_count_ = history_id_set.size();

	446 // If we trim the results set we do not want to cache the results for next

	447 // time as the user's ultimately desired result could easily be eliminated

	448 // in this early rough filter.

	449 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

	450 if (was_trimmed) {

	451 HistoryIDVector history_ids;

	452 std::copy(history_id_set.begin(), history_id_set.end(),

	453 std::back_inserter(history_ids));

	454 // Trim down the set by sorting by typed-count, visit-count, and last

	455 // visit.

	456 HistoryItemFactorGreater

	457 item_factor_functor(history_info_map_);

	458 std::partial_sort(history_ids.begin(),

	459 history_ids.begin() + kItemsToScoreLimit,

	460 history_ids.end(),

	461 item_factor_functor);

	462 history_id_set.clear();

	463 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

	464 std::inserter(history_id_set, history_id_set.end()));

	465 post_filter_item_count_ = history_id_set.size();

	466 }

	467

	468 // Pass over all of the candidates filtering out any without a proper

	469 // substring match, inserting those which pass in order by score. Note that

	470 // in this step we are using the raw search string complete with escaped

	471 // URL elements. When the user has specifically typed something akin to

	472 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

	473 // specific substring appears in the URL or page title.

	474

	475 // We call these 'terms' (as opposed to 'words'; see above) as in this case

	476 // we only want to break up the search string on 'true' whitespace rather than

	477 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

	478 // get two 'terms': "colspec=id%20mstone" and "release".

	479 history::String16Vector lower_raw_terms;

	480 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

	481 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

	482 AddHistoryMatch(*this, lower_raw_string,

	483 lower_raw_terms, base::Time::Now())).ScoredMatches();

	484

	485 // Select and sort only the top kMaxMatches results.

	486 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

	487 std::partial_sort(scored_items.begin(),

	488 scored_items.begin() +

	489 AutocompleteProvider::kMaxMatches,

	490 scored_items.end(),

	491 ScoredHistoryMatch::MatchScoreGreater);

	492 scored_items.resize(AutocompleteProvider::kMaxMatches);

	493 } else {

	494 std::sort(scored_items.begin(), scored_items.end(),

	495 ScoredHistoryMatch::MatchScoreGreater);

	496 }

	497 post_scoring_item_count_ = scored_items.size();

	498

	499 if (was_trimmed) {

	500 search_term_cache_.clear(); // Invalidate the term cache.

	501 } else {

	502 // Remove any stale SearchTermCacheItems.

	503 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

	504 cache_iter != search_term_cache_.end(); ) {

	505 if (!cache_iter->second.used_)

	506 search_term_cache_.erase(cache_iter++);

	507 else

	508 ++cache_iter;

	509 }

	510 }

	511

	512 return scored_items;

	513 }

	514

	515 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

	516

	517 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

	518 const URLIndexPrivateData& private_data,

	519 const string16& lower_string,

	520 const String16Vector& lower_terms,

	521 const base::Time now)

	522 : private_data_(private_data),

	523 lower_string_(lower_string),

	524 lower_terms_(lower_terms),

	525 now_(now) {}

	526

	527 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

	528

	529 void URLIndexPrivateData::AddHistoryMatch::operator()(

	530 const HistoryID history_id) {

	531 HistoryInfoMap::const_iterator hist_pos =

	532 private_data_.history_info_map_.find(history_id);

	533 if (hist_pos != private_data_.history_info_map_.end()) {

	534 const URLRow& hist_item = hist_pos->second;

	535 WordStartsMap::const_iterator starts_pos =

	536 private_data_.word_starts_map_.find(history_id);

	537 DCHECK(starts_pos != private_data_.word_starts_map_.end());

	538 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

	539 starts_pos->second, now_);

	540 if (match.raw_score > 0)

	541 scored_matches_.push_back(match);

	542 }

	543 }

	544

868 void URLIndexPrivateData::ResetSearchTermCache() {	545 void URLIndexPrivateData::ResetSearchTermCache() {

869 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	546 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

870 iter != search_term_cache_.end(); ++iter)	547 iter != search_term_cache_.end(); ++iter)

871 iter->second.used_ = false;	548 iter->second.used_ = false;

872 }	549 }

873	550

874 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	551 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

875 const String16Vector& unsorted_words) {	552 const String16Vector& unsorted_words) {

876 // Break the terms down into individual terms (words), get the candidate	553 // Break the terms down into individual terms (words), get the candidate

877 // set for each term, and intersect each to get a final candidate list.	554 // set for each term, and intersect each to get a final candidate list.

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1046 std::set_intersection(word_id_set.begin(), word_id_set.end(),	723 std::set_intersection(word_id_set.begin(), word_id_set.end(),

1047 char_word_id_set.begin(), char_word_id_set.end(),	724 char_word_id_set.begin(), char_word_id_set.end(),

1048 std::inserter(new_word_id_set,	725 std::inserter(new_word_id_set,

1049 new_word_id_set.begin()));	726 new_word_id_set.begin()));

1050 word_id_set.swap(new_word_id_set);	727 word_id_set.swap(new_word_id_set);

1051 }	728 }

1052 }	729 }

1053 return word_id_set;	730 return word_id_set;

1054 }	731 }

1055	732

1056 bool URLIndexPrivateData::RestoreFromCache(InMemoryURLCacheDatabase* cache_db) {	733 // Cache Saving ----------------------------------------------------------------

1057 DCHECK(cache_db);	734

1058 if (!cache_db->RestorePrivateData(this)) {	735 // static

1059 cache_db->Reset();	736 void URLIndexPrivateData::WritePrivateDataToCacheFileTask(

1060 return false;	737 scoped_refptr<URLIndexPrivateData> private_data,

1061 }	738 const FilePath& file_path,

1062 return !Empty() && ValidateConsistency();	739 scoped_refptr<RefCountedBool> succeeded) {

1063 }	740 DCHECK(private_data.get());

1064	741 DCHECK(!file_path.empty());

1065 void URLIndexPrivateData::DeleteOldVersionCacheFile() const {	742 succeeded->set_value(private_data->SaveToFile(file_path));

1066 if (history_dir_.empty())	743 }

1067 return;	744

1068 FilePath path = history_dir_.Append(chrome::kHQPCacheFilename);	745 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {

1069 file_util::Delete(path, false);	746 base::TimeTicks beginning_time = base::TimeTicks::Now();

1070 }	747 InMemoryURLIndexCacheItem index_cache;

1071	748 SavePrivateData(&index_cache);

1072 bool URLIndexPrivateData::GetCacheDBPath(FilePath* file_path) {	749 std::string data;

1073 DCHECK(file_path);	750 if (!index_cache.SerializeToString(&data)) {

1074 if (history_dir_.empty())	751 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

1075 return false;	752 return false;

1076 *file_path = history_dir_.Append(chrome::kHQPCacheDBFilename);	753 }

1077 return true;	754

1078 }	755 int size = data.size();

1079	756 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {

1080 void URLIndexPrivateData::SetCacheDatabaseForTesting(	757 LOG(WARNING) << "Failed to write " << file_path.value();

1081 InMemoryURLCacheDatabase* test_db) {	758 return false;

1082 cache_db_ = test_db;	759 }

	760 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

	761 base::TimeTicks::Now() - beginning_time);

	762 return true;

	763 }

	764

	765 void URLIndexPrivateData::SavePrivateData(

	766 InMemoryURLIndexCacheItem* cache) const {

	767 DCHECK(cache);

	768 cache->set_timestamp(base::Time::Now().ToInternalValue());

	769 cache->set_version(saved_cache_version_);

	770 // history_item_count_ is no longer used but rather than change the protobuf

	771 // definition use a placeholder. This will go away with the switch to SQLite.

	772 cache->set_history_item_count(0);

	773 SaveWordList(cache);

	774 SaveWordMap(cache);

	775 SaveCharWordMap(cache);

	776 SaveWordIDHistoryMap(cache);

	777 SaveHistoryInfoMap(cache);

	778 SaveWordStartsMap(cache);

	779 }

	780

	781 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

	782 if (word_list_.empty())

	783 return;

	784 WordListItem* list_item = cache->mutable_word_list();

	785 list_item->set_word_count(word_list_.size());

	786 for (String16Vector::const_iterator iter = word_list_.begin();

	787 iter != word_list_.end(); ++iter)

	788 list_item->add_word(UTF16ToUTF8(*iter));

	789 }

	790

	791 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

	792 if (word_map_.empty())

	793 return;

	794 WordMapItem* map_item = cache->mutable_word_map();

	795 map_item->set_item_count(word_map_.size());

	796 for (WordMap::const_iterator iter = word_map_.begin();

	797 iter != word_map_.end(); ++iter) {

	798 WordMapEntry* map_entry = map_item->add_word_map_entry();

	799 map_entry->set_word(UTF16ToUTF8(iter->first));

	800 map_entry->set_word_id(iter->second);

	801 }

	802 }

	803

	804 void URLIndexPrivateData::SaveCharWordMap(

	805 InMemoryURLIndexCacheItem* cache) const {

	806 if (char_word_map_.empty())

	807 return;

	808 CharWordMapItem* map_item = cache->mutable_char_word_map();

	809 map_item->set_item_count(char_word_map_.size());

	810 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();

	811 iter != char_word_map_.end(); ++iter) {

	812 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

	813 map_entry->set_char_16(iter->first);

	814 const WordIDSet& word_id_set(iter->second);

	815 map_entry->set_item_count(word_id_set.size());

	816 for (WordIDSet::const_iterator set_iter = word_id_set.begin();

	817 set_iter != word_id_set.end(); ++set_iter)

	818 map_entry->add_word_id(*set_iter);

	819 }

	820 }

	821

	822 void URLIndexPrivateData::SaveWordIDHistoryMap(

	823 InMemoryURLIndexCacheItem* cache) const {

	824 if (word_id_history_map_.empty())

	825 return;

	826 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

	827 map_item->set_item_count(word_id_history_map_.size());

	828 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();

	829 iter != word_id_history_map_.end(); ++iter) {

	830 WordIDHistoryMapEntry* map_entry =

	831 map_item->add_word_id_history_map_entry();

	832 map_entry->set_word_id(iter->first);

	833 const HistoryIDSet& history_id_set(iter->second);

	834 map_entry->set_item_count(history_id_set.size());

	835 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();

	836 set_iter != history_id_set.end(); ++set_iter)

	837 map_entry->add_history_id(*set_iter);

	838 }

	839 }

	840

	841 void URLIndexPrivateData::SaveHistoryInfoMap(

	842 InMemoryURLIndexCacheItem* cache) const {

	843 if (history_info_map_.empty())

	844 return;

	845 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

	846 map_item->set_item_count(history_info_map_.size());

	847 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

	848 iter != history_info_map_.end(); ++iter) {

	849 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

	850 map_entry->set_history_id(iter->first);

	851 const URLRow& url_row(iter->second);

	852 // Note: We only save information that contributes to the index so there

	853 // is no need to save search_term_cache_ (not persistent).

	854 map_entry->set_visit_count(url_row.visit_count());

	855 map_entry->set_typed_count(url_row.typed_count());

	856 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

	857 map_entry->set_url(url_row.url().spec());

	858 map_entry->set_title(UTF16ToUTF8(url_row.title()));

	859 }

	860 }

	861

	862 void URLIndexPrivateData::SaveWordStartsMap(

	863 InMemoryURLIndexCacheItem* cache) const {

	864 if (word_starts_map_.empty())

	865 return;

	866 // For unit testing: Enable saving of the cache as an earlier version to

	867 // allow testing of cache file upgrading in ReadFromFile().

	868 // TODO(mrossetti): Instead of intruding on production code with this kind of

	869 // test harness, save a copy of an older version cache with known results.

	870 // Implement this when switching the caching over to SQLite.

	871 if (saved_cache_version_ < 1)

	872 return;

	873

	874 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

	875 map_item->set_item_count(word_starts_map_.size());

	876 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

	877 iter != word_starts_map_.end(); ++iter) {

	878 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

	879 map_entry->set_history_id(iter->first);

	880 const RowWordStarts& word_starts(iter->second);

	881 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

	882 i != word_starts.url_word_starts_.end(); ++i)

	883 map_entry->add_url_word_starts(*i);

	884 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

	885 i != word_starts.title_word_starts_.end(); ++i)

	886 map_entry->add_title_word_starts(*i);

	887 }

	888 }

	889

	890 // Cache Restoring -------------------------------------------------------------

	891

	892 // static

	893 void URLIndexPrivateData::RestoreFromFileTask(

	894 const FilePath& file_path,

	895 scoped_refptr<URLIndexPrivateData> private_data,

	896 const std::string& languages) {

	897 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);

	898 }

	899

	900 // static

	901 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

	902 const FilePath& file_path,

	903 const std::string& languages) {

	904 base::TimeTicks beginning_time = base::TimeTicks::Now();

	905 if (!file_util::PathExists(file_path))

	906 return NULL;

	907 std::string data;

	908 // If there is no cache file then simply give up. This will cause us to

	909 // attempt to rebuild from the history database.

	910 if (!file_util::ReadFileToString(file_path, &data))

	911 return NULL;

	912

	913 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

	914 InMemoryURLIndexCacheItem index_cache;

	915 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

	916 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

	917 << file_path.value();

	918 return restored_data;

	919 }

	920

	921 if (!restored_data->RestorePrivateData(index_cache, languages))

	922 return NULL;

	923

	924 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

	925 base::TimeTicks::Now() - beginning_time);

	926 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

	927 restored_data->history_id_word_map_.size());

	928 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

	929 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

	930 restored_data->word_map_.size());

	931 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	932 restored_data->char_word_map_.size());

	933 if (restored_data->Empty())

	934 return NULL; // 'No data' is the same as a failed reload.

	935 return restored_data;

	936 }

	937

	938 // static

	939 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

	940 HistoryDatabase* history_db,

	941 const std::string& languages,

	942 const std::set<std::string>& scheme_whitelist) {

	943 if (!history_db)

	944 return NULL;

	945

	946 base::TimeTicks beginning_time = base::TimeTicks::Now();

	947

	948 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

	949 URLDatabase::URLEnumerator history_enum;

	950 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

	951 return NULL;

	952 for (URLRow row; history_enum.GetNextURL(&row); )

	953 rebuilt_data->IndexRow(row, languages, scheme_whitelist);

	954

	955 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

	956 base::TimeTicks::Now() - beginning_time);

	957 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

	958 rebuilt_data->history_id_word_map_.size());

	959 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

	960 rebuilt_data->word_map_.size());

	961 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	962 rebuilt_data->char_word_map_.size());

	963 return rebuilt_data;

	964 }

	965

	966 bool URLIndexPrivateData::RestorePrivateData(

	967 const InMemoryURLIndexCacheItem& cache,

	968 const std::string& languages) {

	969 if (cache.has_version())

	970 restored_cache_version_ = cache.version();

	971 return RestoreWordList(cache) && RestoreWordMap(cache) &&

	972 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

	973 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);

	974 }

	975

	976 bool URLIndexPrivateData::RestoreWordList(

	977 const InMemoryURLIndexCacheItem& cache) {

	978 if (!cache.has_word_list())

	979 return false;

	980 const WordListItem& list_item(cache.word_list());

	981 uint32 expected_item_count = list_item.word_count();

	982 uint32 actual_item_count = list_item.word_size();

	983 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	984 return false;

	985 const RepeatedPtrField<std::string>& words(list_item.word());

	986 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

	987 iter != words.end(); ++iter)

	988 word_list_.push_back(UTF8ToUTF16(*iter));

	989 return true;

	990 }

	991

	992 bool URLIndexPrivateData::RestoreWordMap(

	993 const InMemoryURLIndexCacheItem& cache) {

	994 if (!cache.has_word_map())

	995 return false;

	996 const WordMapItem& list_item(cache.word_map());

	997 uint32 expected_item_count = list_item.item_count();

	998 uint32 actual_item_count = list_item.word_map_entry_size();

	999 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1000 return false;

	1001 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

	1002 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

	1003 iter != entries.end(); ++iter)

	1004 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

	1005 return true;

	1006 }

	1007

	1008 bool URLIndexPrivateData::RestoreCharWordMap(

	1009 const InMemoryURLIndexCacheItem& cache) {

	1010 if (!cache.has_char_word_map())

	1011 return false;

	1012 const CharWordMapItem& list_item(cache.char_word_map());

	1013 uint32 expected_item_count = list_item.item_count();

	1014 uint32 actual_item_count = list_item.char_word_map_entry_size();

	1015 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1016 return false;

	1017 const RepeatedPtrField<CharWordMapEntry>&

	1018 entries(list_item.char_word_map_entry());

	1019 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =

	1020 entries.begin(); iter != entries.end(); ++iter) {

	1021 expected_item_count = iter->item_count();

	1022 actual_item_count = iter->word_id_size();

	1023 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1024 return false;

	1025 char16 uni_char = static_cast<char16>(iter->char_16());

	1026 WordIDSet word_id_set;

	1027 const RepeatedField<int32>& word_ids(iter->word_id());

	1028 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

	1029 jiter != word_ids.end(); ++jiter)

	1030 word_id_set.insert(*jiter);

	1031 char_word_map_[uni_char] = word_id_set;

	1032 }

	1033 return true;

	1034 }

	1035

	1036 bool URLIndexPrivateData::RestoreWordIDHistoryMap(

	1037 const InMemoryURLIndexCacheItem& cache) {

	1038 if (!cache.has_word_id_history_map())

	1039 return false;

	1040 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

	1041 uint32 expected_item_count = list_item.item_count();

	1042 uint32 actual_item_count = list_item.word_id_history_map_entry_size();

	1043 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1044 return false;

	1045 const RepeatedPtrField<WordIDHistoryMapEntry>&

	1046 entries(list_item.word_id_history_map_entry());

	1047 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =

	1048 entries.begin(); iter != entries.end(); ++iter) {

	1049 expected_item_count = iter->item_count();

	1050 actual_item_count = iter->history_id_size();

	1051 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1052 return false;

	1053 WordID word_id = iter->word_id();

	1054 HistoryIDSet history_id_set;

	1055 const RepeatedField<int64>& history_ids(iter->history_id());

	1056 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

	1057 jiter != history_ids.end(); ++jiter) {

	1058 history_id_set.insert(*jiter);

	1059 AddToHistoryIDWordMap(*jiter, word_id);

	1060 }

	1061 word_id_history_map_[word_id] = history_id_set;

	1062 }

	1063 return true;

	1064 }

	1065

	1066 bool URLIndexPrivateData::RestoreHistoryInfoMap(

	1067 const InMemoryURLIndexCacheItem& cache) {

	1068 if (!cache.has_history_info_map())

	1069 return false;

	1070 const HistoryInfoMapItem& list_item(cache.history_info_map());

	1071 uint32 expected_item_count = list_item.item_count();

	1072 uint32 actual_item_count = list_item.history_info_map_entry_size();

	1073 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1074 return false;

	1075 const RepeatedPtrField<HistoryInfoMapEntry>&

	1076 entries(list_item.history_info_map_entry());

	1077 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =

	1078 entries.begin(); iter != entries.end(); ++iter) {

	1079 HistoryID history_id = iter->history_id();

	1080 GURL url(iter->url());

	1081 URLRow url_row(url, history_id);

	1082 url_row.set_visit_count(iter->visit_count());

	1083 url_row.set_typed_count(iter->typed_count());

	1084 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

	1085 if (iter->has_title()) {

	1086 string16 title(UTF8ToUTF16(iter->title()));

	1087 url_row.set_title(title);

	1088 }

	1089 history_info_map_[history_id] = url_row;

	1090 }

	1091 return true;

	1092 }

	1093

	1094 bool URLIndexPrivateData::RestoreWordStartsMap(

	1095 const InMemoryURLIndexCacheItem& cache,

	1096 const std::string& languages) {

	1097 // Note that this function must be called after RestoreHistoryInfoMap() has

	1098 // been run as the word starts may have to be recalculated from the urls and

	1099 // page titles.

	1100 if (cache.has_word_starts_map()) {

	1101 const WordStartsMapItem& list_item(cache.word_starts_map());

	1102 uint32 expected_item_count = list_item.item_count();

	1103 uint32 actual_item_count = list_item.word_starts_map_entry_size();

	1104 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

	1105 return false;

	1106 const RepeatedPtrField<WordStartsMapEntry>&

	1107 entries(list_item.word_starts_map_entry());

	1108 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

	1109 entries.begin(); iter != entries.end(); ++iter) {

	1110 HistoryID history_id = iter->history_id();

	1111 RowWordStarts word_starts;

	1112 // Restore the URL word starts.

	1113 const RepeatedField<int32>& url_starts(iter->url_word_starts());

	1114 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();

	1115 jiter != url_starts.end(); ++jiter)

	1116 word_starts.url_word_starts_.push_back(*jiter);

	1117 // Restore the page title word starts.

	1118 const RepeatedField<int32>& title_starts(iter->title_word_starts());

	1119 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

	1120 jiter != title_starts.end(); ++jiter)

	1121 word_starts.title_word_starts_.push_back(*jiter);

	1122 word_starts_map_[history_id] = word_starts;

	1123 }

	1124 } else {

	1125 // Since the cache did not contain any word starts we must rebuild then from

	1126 // the URL and page titles.

	1127 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

	1128 iter != history_info_map_.end(); ++iter) {

	1129 RowWordStarts word_starts;

	1130 const URLRow& row(iter->second);

	1131 string16 url(net::FormatUrl(row.url(), languages,

	1132 net::kFormatUrlOmitUsernamePassword,

	1133 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

	1134 NULL, NULL, NULL));

	1135 url = base::i18n::ToLower(url);

	1136 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

	1137 String16VectorFromString16(

	1138 row.title(), false, &word_starts.title_word_starts_);

	1139 word_starts_map_[iter->first] = word_starts;

	1140 }

	1141 }

	1142 return true;

1083 }	1143 }

1084	1144

1085 // static	1145 // static

1086 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1146 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1087 const GURL& gurl,	1147 const GURL& gurl,

1088 const std::set<std::string>& whitelist) {	1148 const std::set<std::string>& whitelist) {

1089 return whitelist.find(gurl.scheme()) != whitelist.end();	1149 return whitelist.find(gurl.scheme()) != whitelist.end();

1090 }	1150 }

1091	1151

1092 } // namespace history	1152 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/history/url_index_private_data_unittest.cc » ('j') | no next file with comments »