Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Side by Side Diff: chrome/browser/history/text_database_manager.cc

Issue 16951015: Remove TextDatabase from the history service. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@replace_fts
Patch Set: Sync and rebase. Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/text_database_manager.h"
6
7 #include <algorithm>
8 #include <functional>
9
10 #include "base/bind.h"
11 #include "base/compiler_specific.h"
12 #include "base/files/file_enumerator.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/metrics/histogram.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "chrome/browser/history/history_publisher.h"
19 #include "chrome/browser/history/visit_database.h"
20
21 using base::Time;
22 using base::TimeDelta;
23 using base::TimeTicks;
24
25 namespace history {
26
27 namespace {
28
29 // The number of database files we will be attached to at once.
30 const int kCacheDBSize = 5;
31
32 std::string ConvertStringForIndexer(const string16& input) {
33 // TODO(evanm): other transformations here?
34 return UTF16ToUTF8(CollapseWhitespace(input, false));
35 }
36
37 // Data older than this will be committed to the full text index even if we
38 // haven't gotten a title and/or body.
39 const int kExpirationSeconds = 20;
40
41 } // namespace
42
43 // TextDatabaseManager::ChangeSet ----------------------------------------------
44
45 TextDatabaseManager::ChangeSet::ChangeSet() {}
46
47 TextDatabaseManager::ChangeSet::~ChangeSet() {}
48
49 // TextDatabaseManager::PageInfo -----------------------------------------------
50
51 TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
52 VisitID visit_id,
53 Time visit_time)
54 : url_id_(url_id),
55 visit_id_(visit_id),
56 visit_time_(visit_time) {
57 added_time_ = TimeTicks::Now();
58 }
59
60 TextDatabaseManager::PageInfo::~PageInfo() {}
61
62 void TextDatabaseManager::PageInfo::set_title(const string16& ttl) {
63 if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
64 title_ = ASCIIToUTF16(" ");
65 else
66 title_ = ttl;
67 }
68
69 void TextDatabaseManager::PageInfo::set_body(const string16& bdy) {
70 if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
71 body_ = ASCIIToUTF16(" ");
72 else
73 body_ = bdy;
74 }
75
76 bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
77 return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
78 }
79
80 // TextDatabaseManager ---------------------------------------------------------
81
82 TextDatabaseManager::TextDatabaseManager(const base::FilePath& dir,
83 URLDatabase* url_database,
84 VisitDatabase* visit_database)
85 : dir_(dir),
86 url_database_(url_database),
87 visit_database_(visit_database),
88 recent_changes_(RecentChangeList::NO_AUTO_EVICT),
89 transaction_nesting_(0),
90 db_cache_(DBCache::NO_AUTO_EVICT),
91 present_databases_loaded_(false),
92 weak_factory_(this),
93 history_publisher_(NULL) {
94 }
95
96 TextDatabaseManager::~TextDatabaseManager() {
97 if (transaction_nesting_)
98 CommitTransaction();
99 }
100
101 // static
102 TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) {
103 Time::Exploded exploded;
104 time.UTCExplode(&exploded);
105
106 // We combine the month and year into a 6-digit number (200801 for
107 // January, 2008). The month is 1-based.
108 return exploded.year * 100 + exploded.month;
109 }
110
111 // static
112 Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) {
113 Time::Exploded exploded;
114 memset(&exploded, 0, sizeof(Time::Exploded));
115 exploded.year = id / 100;
116 exploded.month = id % 100;
117 return Time::FromUTCExploded(exploded);
118 }
119
120 bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) {
121 history_publisher_ = history_publisher;
122
123 // Start checking recent changes and committing them.
124 ScheduleFlushOldChanges();
125 return true;
126 }
127
128 void TextDatabaseManager::BeginTransaction() {
129 transaction_nesting_++;
130 }
131
132 void TextDatabaseManager::CommitTransaction() {
133 DCHECK(transaction_nesting_);
134 transaction_nesting_--;
135 if (transaction_nesting_)
136 return; // Still more nesting of transactions before committing.
137
138 // Commit all databases with open transactions on them.
139 for (DBIdentSet::const_iterator i = open_transactions_.begin();
140 i != open_transactions_.end(); ++i) {
141 DBCache::iterator iter = db_cache_.Get(*i);
142 if (iter == db_cache_.end()) {
143 NOTREACHED() << "All open transactions should be cached.";
144 continue;
145 }
146 iter->second->CommitTransaction();
147 }
148 open_transactions_.clear();
149
150 // Now that the transaction is over, we can expire old connections.
151 db_cache_.ShrinkToSize(kCacheDBSize);
152 }
153
154 void TextDatabaseManager::InitDBList() {
155 if (present_databases_loaded_)
156 return;
157
158 present_databases_loaded_ = true;
159
160 // Find files on disk matching our pattern so we can quickly test for them.
161 base::FilePath::StringType filepattern(TextDatabase::file_base());
162 filepattern.append(FILE_PATH_LITERAL("*"));
163 base::FileEnumerator enumerator(
164 dir_, false, base::FileEnumerator::FILES, filepattern);
165 base::FilePath cur_file;
166 while (!(cur_file = enumerator.Next()).empty()) {
167 // Convert to the number representing this file.
168 TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file);
169 if (id) // Will be 0 on error.
170 present_databases_.insert(id);
171 }
172 }
173
174 void TextDatabaseManager::AddPageURL(const GURL& url,
175 URLID url_id,
176 VisitID visit_id,
177 Time time) {
178 // Delete any existing page info.
179 RecentChangeList::iterator found = recent_changes_.Peek(url);
180 if (found != recent_changes_.end())
181 recent_changes_.Erase(found);
182
183 // Just save this info for later. We will save it when it expires or when all
184 // the data is complete.
185 recent_changes_.Put(url, PageInfo(url_id, visit_id, time));
186 }
187
188 void TextDatabaseManager::AddPageTitle(const GURL& url,
189 const string16& title) {
190 RecentChangeList::iterator found = recent_changes_.Peek(url);
191 if (found == recent_changes_.end()) {
192 // This page is not in our cache of recent pages. This is very much an edge
193 // case as normally a title will come in <20 seconds after the page commits,
194 // and WebContents will avoid spamming us with >1 title per page. However,
195 // it could come up if your connection is unhappy, and we don't want to
196 // miss anything.
197 //
198 // To solve this problem, we'll just associate the most recent visit with
199 // the new title and index that using the regular code path.
200 URLRow url_row;
201 if (!url_database_->GetRowForURL(url, &url_row))
202 return; // URL is unknown, give up.
203 VisitRow visit;
204 if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
205 return; // No recent visit, give up.
206
207 if (visit.is_indexed) {
208 // If this page was already indexed, we could have a body that came in
209 // first and we don't want to overwrite it. We could go query for the
210 // current body, or have a special setter for only the title, but this is
211 // not worth it for this edge case.
212 //
213 // It will be almost impossible for the title to take longer than
214 // kExpirationSeconds yet we got a body in less than that time, since
215 // the title should always come in first.
216 return;
217 }
218
219 AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
220 title, string16());
221 return; // We don't know about this page, give up.
222 }
223
224 PageInfo& info = found->second;
225 if (info.has_body()) {
226 // This info is complete, write to the database.
227 AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
228 title, info.body());
229 recent_changes_.Erase(found);
230 return;
231 }
232
233 info.set_title(title);
234 }
235
236 void TextDatabaseManager::AddPageContents(const GURL& url,
237 const string16& body) {
238 RecentChangeList::iterator found = recent_changes_.Peek(url);
239 if (found == recent_changes_.end()) {
240 // This page is not in our cache of recent pages. This means that the page
241 // took more than kExpirationSeconds to load. Often, this will be the result
242 // of a very slow iframe or other resource on the page that makes us think
243 // it's still loading.
244 //
245 // As a fallback, set the most recent visit's contents using the input, and
246 // use the last set title in the URL table as the title to index.
247 URLRow url_row;
248 if (!url_database_->GetRowForURL(url, &url_row))
249 return; // URL is unknown, give up.
250 VisitRow visit;
251 if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
252 return; // No recent visit, give up.
253
254 // Use the title from the URL row as the title for the indexing.
255 AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
256 url_row.title(), body);
257 return;
258 }
259
260 PageInfo& info = found->second;
261 if (info.has_title()) {
262 // This info is complete, write to the database.
263 AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
264 info.title(), body);
265 recent_changes_.Erase(found);
266 return;
267 }
268
269 info.set_body(body);
270 }
271
272 bool TextDatabaseManager::AddPageData(const GURL& url,
273 URLID url_id,
274 VisitID visit_id,
275 Time visit_time,
276 const string16& title,
277 const string16& body) {
278 TextDatabase* db = GetDBForTime(visit_time, true);
279 if (!db)
280 return false;
281
282 TimeTicks beginning_time = TimeTicks::Now();
283
284 // First delete any recently-indexed data for this page. This will delete
285 // anything in the main database, but we don't bother looking through the
286 // archived database.
287 VisitVector visits;
288 visit_database_->GetIndexedVisitsForURL(url_id, &visits);
289 for (size_t i = 0; i < visits.size(); i++) {
290 visits[i].is_indexed = false;
291 visit_database_->UpdateVisitRow(visits[i]);
292 DeletePageData(visits[i].visit_time, url, NULL);
293 }
294
295 if (visit_id) {
296 // We're supposed to update the visit database, so load the visit.
297 VisitRow row;
298 if (!visit_database_->GetRowForVisit(visit_id, &row)) {
299 // This situation can occur if Chrome's history is in the process of
300 // being updated, and then the browsing history is deleted before all
301 // updates have been completely performed. In this case, a stale update
302 // to the database is attempted, leading to the warning below.
303 DLOG(WARNING) << "Could not find requested visit #" << visit_id;
304 return false;
305 }
306
307 DCHECK(visit_time == row.visit_time);
308
309 // Update the visit database to reference our addition.
310 row.is_indexed = true;
311 if (!visit_database_->UpdateVisitRow(row))
312 return false;
313 }
314
315 // Now index the data.
316 std::string url_str = URLDatabase::GURLToDatabaseURL(url);
317 bool success = db->AddPageData(visit_time, url_str,
318 ConvertStringForIndexer(title),
319 ConvertStringForIndexer(body));
320
321 UMA_HISTOGRAM_TIMES("History.AddFTSData",
322 TimeTicks::Now() - beginning_time);
323
324 if (history_publisher_)
325 history_publisher_->PublishPageContent(visit_time, url, title, body);
326
327 return success;
328 }
329
330 void TextDatabaseManager::DeletePageData(Time time, const GURL& url,
331 ChangeSet* change_set) {
332 TextDatabase::DBIdent db_ident = TimeToID(time);
333
334 // We want to open the database for writing, but only if it exists. To
335 // achieve this, we check whether it exists by saying we're not going to
336 // write to it (avoiding the autocreation code normally called when writing)
337 // and then access it for writing only if it succeeds.
338 TextDatabase* db = GetDB(db_ident, false);
339 if (!db)
340 return;
341 db = GetDB(db_ident, true);
342
343 if (change_set)
344 change_set->Add(db_ident);
345
346 db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url));
347 }
348
349 void TextDatabaseManager::DeleteFromUncommitted(
350 const std::set<GURL>& restrict_urls, Time begin, Time end) {
351 // First find the beginning of the range to delete. Recall that the list
352 // has the most recent item at the beginning. There won't normally be very
353 // many items, so a brute-force search is fine.
354 RecentChangeList::iterator cur = recent_changes_.begin();
355 if (!end.is_null()) {
356 // Walk from the beginning of the list backwards in time to find the newest
357 // entry that should be deleted.
358 while (cur != recent_changes_.end() && cur->second.visit_time() >= end)
359 ++cur;
360 }
361
362 // Now delete all visits up to the oldest one we were supposed to delete.
363 // Note that if begin is_null, it will be less than or equal to any other
364 // time.
365 if (restrict_urls.empty()) {
366 while (cur != recent_changes_.end() && cur->second.visit_time() >= begin)
367 cur = recent_changes_.Erase(cur);
368 } else {
369 while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) {
370 if (restrict_urls.find(cur->first) != restrict_urls.end())
371 cur = recent_changes_.Erase(cur);
372 else
373 ++cur;
374 }
375 }
376 }
377
378 void TextDatabaseManager::DeleteFromUncommittedForTimes(
379 const std::vector<base::Time>& times) {
380 // |times| must be in reverse chronological order, i.e. each member
381 // must be earlier than or the same as the one before it.
382 DCHECK(
383 std::adjacent_find(
384 times.begin(), times.end(), std::less<base::Time>()) ==
385 times.end());
386
387 // Both |recent_changes_| and |times| are in reverse chronological order.
388 RecentChangeList::iterator it = recent_changes_.begin();
389 std::vector<base::Time>::const_iterator time_it = times.begin();
390 while (it != recent_changes_.end() && time_it != times.end()) {
391 base::Time visit_time = it->second.visit_time();
392 if (visit_time == *time_it) {
393 it = recent_changes_.Erase(it);
394 } else if (visit_time < *time_it) {
395 ++time_it;
396 } else /* if (visit_time > *time_it) */ {
397 ++it;
398 }
399 }
400 }
401
402 void TextDatabaseManager::DeleteAll() {
403 DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction.";
404
405 InitDBList();
406
407 // Delete uncommitted entries.
408 recent_changes_.Clear();
409
410 // Close all open databases.
411 db_cache_.Clear();
412
413 // Now go through and delete all the files.
414 for (DBIdentSet::iterator i = present_databases_.begin();
415 i != present_databases_.end(); ++i) {
416 base::FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i));
417 sql::Connection::Delete(file_name);
418 }
419 }
420
421 void TextDatabaseManager::OptimizeChangedDatabases(
422 const ChangeSet& change_set) {
423 for (ChangeSet::DBSet::const_iterator i =
424 change_set.changed_databases_.begin();
425 i != change_set.changed_databases_.end(); ++i) {
426 // We want to open the database for writing, but only if it exists. To
427 // achieve this, we check whether it exists by saying we're not going to
428 // write to it (avoiding the autocreation code normally called when writing)
429 // and then access it for writing only if it succeeds.
430 TextDatabase* db = GetDB(*i, false);
431 if (!db)
432 continue;
433 db = GetDB(*i, true);
434 if (!db)
435 continue; // The file may have changed or something.
436 db->Optimize();
437 }
438 }
439
440 void TextDatabaseManager::GetTextMatches(
441 const string16& query,
442 const QueryOptions& options,
443 std::vector<TextDatabase::Match>* results,
444 Time* first_time_searched) {
445 results->clear();
446
447 *first_time_searched = options.begin_time;
448
449 InitDBList();
450 if (present_databases_.empty())
451 return; // Nothing to search.
452
453 // Get the query into the proper format for the individual DBs.
454 string16 fts_query16;
455 query_parser_.ParseQuery(query, &fts_query16);
456 std::string fts_query = UTF16ToUTF8(fts_query16);
457
458 // Need a copy of the options so we can modify the max count for each call
459 // to the individual databases.
460 QueryOptions cur_options(options);
461
462 // Compute the minimum and maximum values for the identifiers that could
463 // encompass the input time range.
464 TextDatabase::DBIdent min_ident = options.begin_time.is_null() ?
465 *present_databases_.begin() :
466 TimeToID(options.begin_time);
467 TextDatabase::DBIdent max_ident = options.end_time.is_null() ?
468 *present_databases_.rbegin() :
469 TimeToID(options.end_time);
470
471 // Iterate over the databases from the most recent backwards.
472 TextDatabase::URLSet found_urls;
473 for (DBIdentSet::reverse_iterator i = present_databases_.rbegin();
474 i != present_databases_.rend();
475 ++i) {
476 // TODO(brettw) allow canceling the query in the middle.
477 // if (canceled_or_something)
478 // break;
479
480 // This code is stupid, we just loop until we find the correct starting
481 // time range rather than search in an intelligent way. Users will have a
482 // few dozen files at most, so this should not be an issue.
483 if (*i > max_ident)
484 continue; // Haven't gotten to the time range yet.
485 if (*i < min_ident)
486 break; // Covered all the time range.
487
488 TextDatabase* cur_db = GetDB(*i, false);
489 if (!cur_db)
490 continue;
491
492 // Adjust the max count according to how many results we've already got.
493 if (options.max_count) {
494 cur_options.max_count = options.max_count -
495 static_cast<int>(results->size());
496 }
497
498 bool has_more_results = cur_db->GetTextMatches(
499 fts_query, cur_options, results, &found_urls);
500
501 DCHECK(static_cast<int>(results->size()) <= options.EffectiveMaxCount());
502
503 if (has_more_results ||
504 static_cast<int>(results->size()) == options.EffectiveMaxCount()) {
505 // Since the search proceeds backwards in time, the last result we have
506 // gives the first time searched.
507 *first_time_searched = results->back().time;
508 break;
509 }
510 }
511 }
512
513 size_t TextDatabaseManager::GetUncommittedEntryCountForTest() const {
514 return recent_changes_.size();
515 }
516
517 TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id,
518 bool for_writing) {
519 DBCache::iterator found_db = db_cache_.Get(id);
520 if (found_db != db_cache_.end()) {
521 if (transaction_nesting_ && for_writing &&
522 open_transactions_.find(id) == open_transactions_.end()) {
523 // If we currently have an open transaction, that database is not yet
524 // part of the transaction, and the database will be written to, it needs
525 // to be part of our transaction.
526 found_db->second->BeginTransaction();
527 open_transactions_.insert(id);
528 }
529 return found_db->second;
530 }
531
532 // Need to make the database.
533 TextDatabase* new_db = new TextDatabase(dir_, id, for_writing);
534 if (!new_db->Init()) {
535 delete new_db;
536 return NULL;
537 }
538 db_cache_.Put(id, new_db);
539 present_databases_.insert(id);
540
541 if (transaction_nesting_ && for_writing) {
542 // If we currently have an open transaction and the new database will be
543 // written to, it needs to be part of our transaction.
544 new_db->BeginTransaction();
545 open_transactions_.insert(id);
546 }
547
548 // When no transaction is open, allow this new one to kick out an old one.
549 if (!transaction_nesting_)
550 db_cache_.ShrinkToSize(kCacheDBSize);
551
552 return new_db;
553 }
554
555 TextDatabase* TextDatabaseManager::GetDBForTime(Time time,
556 bool create_if_necessary) {
557 return GetDB(TimeToID(time), create_if_necessary);
558 }
559
560 void TextDatabaseManager::ScheduleFlushOldChanges() {
561 weak_factory_.InvalidateWeakPtrs();
562 base::MessageLoop::current()->PostDelayedTask(
563 FROM_HERE,
564 base::Bind(&TextDatabaseManager::FlushOldChanges,
565 weak_factory_.GetWeakPtr()),
566 base::TimeDelta::FromSeconds(kExpirationSeconds));
567 }
568
569 void TextDatabaseManager::FlushOldChanges() {
570 FlushOldChangesForTime(TimeTicks::Now());
571 }
572
573 void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) {
574 // The end of the list is the oldest, so we just start from there committing
575 // things until we get something too new.
576 RecentChangeList::reverse_iterator i = recent_changes_.rbegin();
577 while (i != recent_changes_.rend() && i->second.Expired(now)) {
578 AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
579 i->second.visit_time(), i->second.title(), i->second.body());
580 i = recent_changes_.Erase(i);
581 }
582
583 ScheduleFlushOldChanges();
584 }
585
586 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/text_database_manager.h ('k') | chrome/browser/history/text_database_manager_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698