Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Unified Diff: chrome/browser/history/text_database_manager.cc

Issue 16951015: Remove TextDatabase from the history service. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@replace_fts
Patch Set: Sync and rebase. Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/history/text_database_manager.cc
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc
deleted file mode 100644
index c43a5fcece1e034ae29c7bf64d53ab3899ba3d1f..0000000000000000000000000000000000000000
--- a/chrome/browser/history/text_database_manager.cc
+++ /dev/null
@@ -1,586 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/history/text_database_manager.h"
-
-#include <algorithm>
-#include <functional>
-
-#include "base/bind.h"
-#include "base/compiler_specific.h"
-#include "base/files/file_enumerator.h"
-#include "base/logging.h"
-#include "base/message_loop/message_loop.h"
-#include "base/metrics/histogram.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "chrome/browser/history/history_publisher.h"
-#include "chrome/browser/history/visit_database.h"
-
-using base::Time;
-using base::TimeDelta;
-using base::TimeTicks;
-
-namespace history {
-
-namespace {
-
-// The number of database files we will be attached to at once.
-const int kCacheDBSize = 5;
-
-std::string ConvertStringForIndexer(const string16& input) {
- // TODO(evanm): other transformations here?
- return UTF16ToUTF8(CollapseWhitespace(input, false));
-}
-
-// Data older than this will be committed to the full text index even if we
-// haven't gotten a title and/or body.
-const int kExpirationSeconds = 20;
-
-} // namespace
-
-// TextDatabaseManager::ChangeSet ----------------------------------------------
-
-TextDatabaseManager::ChangeSet::ChangeSet() {}
-
-TextDatabaseManager::ChangeSet::~ChangeSet() {}
-
-// TextDatabaseManager::PageInfo -----------------------------------------------
-
-TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
- VisitID visit_id,
- Time visit_time)
- : url_id_(url_id),
- visit_id_(visit_id),
- visit_time_(visit_time) {
- added_time_ = TimeTicks::Now();
-}
-
-TextDatabaseManager::PageInfo::~PageInfo() {}
-
-void TextDatabaseManager::PageInfo::set_title(const string16& ttl) {
- if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
- title_ = ASCIIToUTF16(" ");
- else
- title_ = ttl;
-}
-
-void TextDatabaseManager::PageInfo::set_body(const string16& bdy) {
- if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
- body_ = ASCIIToUTF16(" ");
- else
- body_ = bdy;
-}
-
-bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
- return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
-}
-
-// TextDatabaseManager ---------------------------------------------------------
-
-TextDatabaseManager::TextDatabaseManager(const base::FilePath& dir,
- URLDatabase* url_database,
- VisitDatabase* visit_database)
- : dir_(dir),
- url_database_(url_database),
- visit_database_(visit_database),
- recent_changes_(RecentChangeList::NO_AUTO_EVICT),
- transaction_nesting_(0),
- db_cache_(DBCache::NO_AUTO_EVICT),
- present_databases_loaded_(false),
- weak_factory_(this),
- history_publisher_(NULL) {
-}
-
-TextDatabaseManager::~TextDatabaseManager() {
- if (transaction_nesting_)
- CommitTransaction();
-}
-
-// static
-TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) {
- Time::Exploded exploded;
- time.UTCExplode(&exploded);
-
- // We combine the month and year into a 6-digit number (200801 for
- // January, 2008). The month is 1-based.
- return exploded.year * 100 + exploded.month;
-}
-
-// static
-Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) {
- Time::Exploded exploded;
- memset(&exploded, 0, sizeof(Time::Exploded));
- exploded.year = id / 100;
- exploded.month = id % 100;
- return Time::FromUTCExploded(exploded);
-}
-
-bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) {
- history_publisher_ = history_publisher;
-
- // Start checking recent changes and committing them.
- ScheduleFlushOldChanges();
- return true;
-}
-
-void TextDatabaseManager::BeginTransaction() {
- transaction_nesting_++;
-}
-
-void TextDatabaseManager::CommitTransaction() {
- DCHECK(transaction_nesting_);
- transaction_nesting_--;
- if (transaction_nesting_)
- return; // Still more nesting of transactions before committing.
-
- // Commit all databases with open transactions on them.
- for (DBIdentSet::const_iterator i = open_transactions_.begin();
- i != open_transactions_.end(); ++i) {
- DBCache::iterator iter = db_cache_.Get(*i);
- if (iter == db_cache_.end()) {
- NOTREACHED() << "All open transactions should be cached.";
- continue;
- }
- iter->second->CommitTransaction();
- }
- open_transactions_.clear();
-
- // Now that the transaction is over, we can expire old connections.
- db_cache_.ShrinkToSize(kCacheDBSize);
-}
-
-void TextDatabaseManager::InitDBList() {
- if (present_databases_loaded_)
- return;
-
- present_databases_loaded_ = true;
-
- // Find files on disk matching our pattern so we can quickly test for them.
- base::FilePath::StringType filepattern(TextDatabase::file_base());
- filepattern.append(FILE_PATH_LITERAL("*"));
- base::FileEnumerator enumerator(
- dir_, false, base::FileEnumerator::FILES, filepattern);
- base::FilePath cur_file;
- while (!(cur_file = enumerator.Next()).empty()) {
- // Convert to the number representing this file.
- TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file);
- if (id) // Will be 0 on error.
- present_databases_.insert(id);
- }
-}
-
-void TextDatabaseManager::AddPageURL(const GURL& url,
- URLID url_id,
- VisitID visit_id,
- Time time) {
- // Delete any existing page info.
- RecentChangeList::iterator found = recent_changes_.Peek(url);
- if (found != recent_changes_.end())
- recent_changes_.Erase(found);
-
- // Just save this info for later. We will save it when it expires or when all
- // the data is complete.
- recent_changes_.Put(url, PageInfo(url_id, visit_id, time));
-}
-
-void TextDatabaseManager::AddPageTitle(const GURL& url,
- const string16& title) {
- RecentChangeList::iterator found = recent_changes_.Peek(url);
- if (found == recent_changes_.end()) {
- // This page is not in our cache of recent pages. This is very much an edge
- // case as normally a title will come in <20 seconds after the page commits,
- // and WebContents will avoid spamming us with >1 title per page. However,
- // it could come up if your connection is unhappy, and we don't want to
- // miss anything.
- //
- // To solve this problem, we'll just associate the most recent visit with
- // the new title and index that using the regular code path.
- URLRow url_row;
- if (!url_database_->GetRowForURL(url, &url_row))
- return; // URL is unknown, give up.
- VisitRow visit;
- if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
- return; // No recent visit, give up.
-
- if (visit.is_indexed) {
- // If this page was already indexed, we could have a body that came in
- // first and we don't want to overwrite it. We could go query for the
- // current body, or have a special setter for only the title, but this is
- // not worth it for this edge case.
- //
- // It will be almost impossible for the title to take longer than
- // kExpirationSeconds yet we got a body in less than that time, since
- // the title should always come in first.
- return;
- }
-
- AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
- title, string16());
- return; // We don't know about this page, give up.
- }
-
- PageInfo& info = found->second;
- if (info.has_body()) {
- // This info is complete, write to the database.
- AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
- title, info.body());
- recent_changes_.Erase(found);
- return;
- }
-
- info.set_title(title);
-}
-
-void TextDatabaseManager::AddPageContents(const GURL& url,
- const string16& body) {
- RecentChangeList::iterator found = recent_changes_.Peek(url);
- if (found == recent_changes_.end()) {
- // This page is not in our cache of recent pages. This means that the page
- // took more than kExpirationSeconds to load. Often, this will be the result
- // of a very slow iframe or other resource on the page that makes us think
- // it's still loading.
- //
- // As a fallback, set the most recent visit's contents using the input, and
- // use the last set title in the URL table as the title to index.
- URLRow url_row;
- if (!url_database_->GetRowForURL(url, &url_row))
- return; // URL is unknown, give up.
- VisitRow visit;
- if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit))
- return; // No recent visit, give up.
-
- // Use the title from the URL row as the title for the indexing.
- AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
- url_row.title(), body);
- return;
- }
-
- PageInfo& info = found->second;
- if (info.has_title()) {
- // This info is complete, write to the database.
- AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
- info.title(), body);
- recent_changes_.Erase(found);
- return;
- }
-
- info.set_body(body);
-}
-
-bool TextDatabaseManager::AddPageData(const GURL& url,
- URLID url_id,
- VisitID visit_id,
- Time visit_time,
- const string16& title,
- const string16& body) {
- TextDatabase* db = GetDBForTime(visit_time, true);
- if (!db)
- return false;
-
- TimeTicks beginning_time = TimeTicks::Now();
-
- // First delete any recently-indexed data for this page. This will delete
- // anything in the main database, but we don't bother looking through the
- // archived database.
- VisitVector visits;
- visit_database_->GetIndexedVisitsForURL(url_id, &visits);
- for (size_t i = 0; i < visits.size(); i++) {
- visits[i].is_indexed = false;
- visit_database_->UpdateVisitRow(visits[i]);
- DeletePageData(visits[i].visit_time, url, NULL);
- }
-
- if (visit_id) {
- // We're supposed to update the visit database, so load the visit.
- VisitRow row;
- if (!visit_database_->GetRowForVisit(visit_id, &row)) {
- // This situation can occur if Chrome's history is in the process of
- // being updated, and then the browsing history is deleted before all
- // updates have been completely performed. In this case, a stale update
- // to the database is attempted, leading to the warning below.
- DLOG(WARNING) << "Could not find requested visit #" << visit_id;
- return false;
- }
-
- DCHECK(visit_time == row.visit_time);
-
- // Update the visit database to reference our addition.
- row.is_indexed = true;
- if (!visit_database_->UpdateVisitRow(row))
- return false;
- }
-
- // Now index the data.
- std::string url_str = URLDatabase::GURLToDatabaseURL(url);
- bool success = db->AddPageData(visit_time, url_str,
- ConvertStringForIndexer(title),
- ConvertStringForIndexer(body));
-
- UMA_HISTOGRAM_TIMES("History.AddFTSData",
- TimeTicks::Now() - beginning_time);
-
- if (history_publisher_)
- history_publisher_->PublishPageContent(visit_time, url, title, body);
-
- return success;
-}
-
-void TextDatabaseManager::DeletePageData(Time time, const GURL& url,
- ChangeSet* change_set) {
- TextDatabase::DBIdent db_ident = TimeToID(time);
-
- // We want to open the database for writing, but only if it exists. To
- // achieve this, we check whether it exists by saying we're not going to
- // write to it (avoiding the autocreation code normally called when writing)
- // and then access it for writing only if it succeeds.
- TextDatabase* db = GetDB(db_ident, false);
- if (!db)
- return;
- db = GetDB(db_ident, true);
-
- if (change_set)
- change_set->Add(db_ident);
-
- db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url));
-}
-
-void TextDatabaseManager::DeleteFromUncommitted(
- const std::set<GURL>& restrict_urls, Time begin, Time end) {
- // First find the beginning of the range to delete. Recall that the list
- // has the most recent item at the beginning. There won't normally be very
- // many items, so a brute-force search is fine.
- RecentChangeList::iterator cur = recent_changes_.begin();
- if (!end.is_null()) {
- // Walk from the beginning of the list backwards in time to find the newest
- // entry that should be deleted.
- while (cur != recent_changes_.end() && cur->second.visit_time() >= end)
- ++cur;
- }
-
- // Now delete all visits up to the oldest one we were supposed to delete.
- // Note that if begin is_null, it will be less than or equal to any other
- // time.
- if (restrict_urls.empty()) {
- while (cur != recent_changes_.end() && cur->second.visit_time() >= begin)
- cur = recent_changes_.Erase(cur);
- } else {
- while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) {
- if (restrict_urls.find(cur->first) != restrict_urls.end())
- cur = recent_changes_.Erase(cur);
- else
- ++cur;
- }
- }
-}
-
-void TextDatabaseManager::DeleteFromUncommittedForTimes(
- const std::vector<base::Time>& times) {
- // |times| must be in reverse chronological order, i.e. each member
- // must be earlier than or the same as the one before it.
- DCHECK(
- std::adjacent_find(
- times.begin(), times.end(), std::less<base::Time>()) ==
- times.end());
-
- // Both |recent_changes_| and |times| are in reverse chronological order.
- RecentChangeList::iterator it = recent_changes_.begin();
- std::vector<base::Time>::const_iterator time_it = times.begin();
- while (it != recent_changes_.end() && time_it != times.end()) {
- base::Time visit_time = it->second.visit_time();
- if (visit_time == *time_it) {
- it = recent_changes_.Erase(it);
- } else if (visit_time < *time_it) {
- ++time_it;
- } else /* if (visit_time > *time_it) */ {
- ++it;
- }
- }
-}
-
-void TextDatabaseManager::DeleteAll() {
- DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction.";
-
- InitDBList();
-
- // Delete uncommitted entries.
- recent_changes_.Clear();
-
- // Close all open databases.
- db_cache_.Clear();
-
- // Now go through and delete all the files.
- for (DBIdentSet::iterator i = present_databases_.begin();
- i != present_databases_.end(); ++i) {
- base::FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i));
- sql::Connection::Delete(file_name);
- }
-}
-
-void TextDatabaseManager::OptimizeChangedDatabases(
- const ChangeSet& change_set) {
- for (ChangeSet::DBSet::const_iterator i =
- change_set.changed_databases_.begin();
- i != change_set.changed_databases_.end(); ++i) {
- // We want to open the database for writing, but only if it exists. To
- // achieve this, we check whether it exists by saying we're not going to
- // write to it (avoiding the autocreation code normally called when writing)
- // and then access it for writing only if it succeeds.
- TextDatabase* db = GetDB(*i, false);
- if (!db)
- continue;
- db = GetDB(*i, true);
- if (!db)
- continue; // The file may have changed or something.
- db->Optimize();
- }
-}
-
-void TextDatabaseManager::GetTextMatches(
- const string16& query,
- const QueryOptions& options,
- std::vector<TextDatabase::Match>* results,
- Time* first_time_searched) {
- results->clear();
-
- *first_time_searched = options.begin_time;
-
- InitDBList();
- if (present_databases_.empty())
- return; // Nothing to search.
-
- // Get the query into the proper format for the individual DBs.
- string16 fts_query16;
- query_parser_.ParseQuery(query, &fts_query16);
- std::string fts_query = UTF16ToUTF8(fts_query16);
-
- // Need a copy of the options so we can modify the max count for each call
- // to the individual databases.
- QueryOptions cur_options(options);
-
- // Compute the minimum and maximum values for the identifiers that could
- // encompass the input time range.
- TextDatabase::DBIdent min_ident = options.begin_time.is_null() ?
- *present_databases_.begin() :
- TimeToID(options.begin_time);
- TextDatabase::DBIdent max_ident = options.end_time.is_null() ?
- *present_databases_.rbegin() :
- TimeToID(options.end_time);
-
- // Iterate over the databases from the most recent backwards.
- TextDatabase::URLSet found_urls;
- for (DBIdentSet::reverse_iterator i = present_databases_.rbegin();
- i != present_databases_.rend();
- ++i) {
- // TODO(brettw) allow canceling the query in the middle.
- // if (canceled_or_something)
- // break;
-
- // This code is stupid, we just loop until we find the correct starting
- // time range rather than search in an intelligent way. Users will have a
- // few dozen files at most, so this should not be an issue.
- if (*i > max_ident)
- continue; // Haven't gotten to the time range yet.
- if (*i < min_ident)
- break; // Covered all the time range.
-
- TextDatabase* cur_db = GetDB(*i, false);
- if (!cur_db)
- continue;
-
- // Adjust the max count according to how many results we've already got.
- if (options.max_count) {
- cur_options.max_count = options.max_count -
- static_cast<int>(results->size());
- }
-
- bool has_more_results = cur_db->GetTextMatches(
- fts_query, cur_options, results, &found_urls);
-
- DCHECK(static_cast<int>(results->size()) <= options.EffectiveMaxCount());
-
- if (has_more_results ||
- static_cast<int>(results->size()) == options.EffectiveMaxCount()) {
- // Since the search proceeds backwards in time, the last result we have
- // gives the first time searched.
- *first_time_searched = results->back().time;
- break;
- }
- }
-}
-
-size_t TextDatabaseManager::GetUncommittedEntryCountForTest() const {
- return recent_changes_.size();
-}
-
-TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id,
- bool for_writing) {
- DBCache::iterator found_db = db_cache_.Get(id);
- if (found_db != db_cache_.end()) {
- if (transaction_nesting_ && for_writing &&
- open_transactions_.find(id) == open_transactions_.end()) {
- // If we currently have an open transaction, that database is not yet
- // part of the transaction, and the database will be written to, it needs
- // to be part of our transaction.
- found_db->second->BeginTransaction();
- open_transactions_.insert(id);
- }
- return found_db->second;
- }
-
- // Need to make the database.
- TextDatabase* new_db = new TextDatabase(dir_, id, for_writing);
- if (!new_db->Init()) {
- delete new_db;
- return NULL;
- }
- db_cache_.Put(id, new_db);
- present_databases_.insert(id);
-
- if (transaction_nesting_ && for_writing) {
- // If we currently have an open transaction and the new database will be
- // written to, it needs to be part of our transaction.
- new_db->BeginTransaction();
- open_transactions_.insert(id);
- }
-
- // When no transaction is open, allow this new one to kick out an old one.
- if (!transaction_nesting_)
- db_cache_.ShrinkToSize(kCacheDBSize);
-
- return new_db;
-}
-
-TextDatabase* TextDatabaseManager::GetDBForTime(Time time,
- bool create_if_necessary) {
- return GetDB(TimeToID(time), create_if_necessary);
-}
-
-void TextDatabaseManager::ScheduleFlushOldChanges() {
- weak_factory_.InvalidateWeakPtrs();
- base::MessageLoop::current()->PostDelayedTask(
- FROM_HERE,
- base::Bind(&TextDatabaseManager::FlushOldChanges,
- weak_factory_.GetWeakPtr()),
- base::TimeDelta::FromSeconds(kExpirationSeconds));
-}
-
-void TextDatabaseManager::FlushOldChanges() {
- FlushOldChangesForTime(TimeTicks::Now());
-}
-
-void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) {
- // The end of the list is the oldest, so we just start from there committing
- // things until we get something too new.
- RecentChangeList::reverse_iterator i = recent_changes_.rbegin();
- while (i != recent_changes_.rend() && i->second.Expired(now)) {
- AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
- i->second.visit_time(), i->second.title(), i->second.body());
- i = recent_changes_.Erase(i);
- }
-
- ScheduleFlushOldChanges();
-}
-
-} // namespace history
« no previous file with comments | « chrome/browser/history/text_database_manager.h ('k') | chrome/browser/history/text_database_manager_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698