Index: chrome/browser/history/text_database_manager.cc |
diff --git a/chrome/browser/history/text_database_manager.cc b/chrome/browser/history/text_database_manager.cc |
deleted file mode 100644 |
index c43a5fcece1e034ae29c7bf64d53ab3899ba3d1f..0000000000000000000000000000000000000000 |
--- a/chrome/browser/history/text_database_manager.cc |
+++ /dev/null |
@@ -1,586 +0,0 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "chrome/browser/history/text_database_manager.h" |
- |
-#include <algorithm> |
-#include <functional> |
- |
-#include "base/bind.h" |
-#include "base/compiler_specific.h" |
-#include "base/files/file_enumerator.h" |
-#include "base/logging.h" |
-#include "base/message_loop/message_loop.h" |
-#include "base/metrics/histogram.h" |
-#include "base/strings/string_util.h" |
-#include "base/strings/utf_string_conversions.h" |
-#include "chrome/browser/history/history_publisher.h" |
-#include "chrome/browser/history/visit_database.h" |
- |
-using base::Time; |
-using base::TimeDelta; |
-using base::TimeTicks; |
- |
-namespace history { |
- |
-namespace { |
- |
-// The number of database files we will be attached to at once. |
-const int kCacheDBSize = 5; |
- |
-std::string ConvertStringForIndexer(const string16& input) { |
- // TODO(evanm): other transformations here? |
- return UTF16ToUTF8(CollapseWhitespace(input, false)); |
-} |
- |
-// Data older than this will be committed to the full text index even if we |
-// haven't gotten a title and/or body. |
-const int kExpirationSeconds = 20; |
- |
-} // namespace |
- |
-// TextDatabaseManager::ChangeSet ---------------------------------------------- |
- |
-TextDatabaseManager::ChangeSet::ChangeSet() {} |
- |
-TextDatabaseManager::ChangeSet::~ChangeSet() {} |
- |
-// TextDatabaseManager::PageInfo ----------------------------------------------- |
- |
-TextDatabaseManager::PageInfo::PageInfo(URLID url_id, |
- VisitID visit_id, |
- Time visit_time) |
- : url_id_(url_id), |
- visit_id_(visit_id), |
- visit_time_(visit_time) { |
- added_time_ = TimeTicks::Now(); |
-} |
- |
-TextDatabaseManager::PageInfo::~PageInfo() {} |
- |
-void TextDatabaseManager::PageInfo::set_title(const string16& ttl) { |
- if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet. |
- title_ = ASCIIToUTF16(" "); |
- else |
- title_ = ttl; |
-} |
- |
-void TextDatabaseManager::PageInfo::set_body(const string16& bdy) { |
- if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet. |
- body_ = ASCIIToUTF16(" "); |
- else |
- body_ = bdy; |
-} |
- |
-bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const { |
- return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds); |
-} |
- |
-// TextDatabaseManager --------------------------------------------------------- |
- |
-TextDatabaseManager::TextDatabaseManager(const base::FilePath& dir, |
- URLDatabase* url_database, |
- VisitDatabase* visit_database) |
- : dir_(dir), |
- url_database_(url_database), |
- visit_database_(visit_database), |
- recent_changes_(RecentChangeList::NO_AUTO_EVICT), |
- transaction_nesting_(0), |
- db_cache_(DBCache::NO_AUTO_EVICT), |
- present_databases_loaded_(false), |
- weak_factory_(this), |
- history_publisher_(NULL) { |
-} |
- |
-TextDatabaseManager::~TextDatabaseManager() { |
- if (transaction_nesting_) |
- CommitTransaction(); |
-} |
- |
-// static |
-TextDatabase::DBIdent TextDatabaseManager::TimeToID(Time time) { |
- Time::Exploded exploded; |
- time.UTCExplode(&exploded); |
- |
- // We combine the month and year into a 6-digit number (200801 for |
- // January, 2008). The month is 1-based. |
- return exploded.year * 100 + exploded.month; |
-} |
- |
-// static |
-Time TextDatabaseManager::IDToTime(TextDatabase::DBIdent id) { |
- Time::Exploded exploded; |
- memset(&exploded, 0, sizeof(Time::Exploded)); |
- exploded.year = id / 100; |
- exploded.month = id % 100; |
- return Time::FromUTCExploded(exploded); |
-} |
- |
-bool TextDatabaseManager::Init(const HistoryPublisher* history_publisher) { |
- history_publisher_ = history_publisher; |
- |
- // Start checking recent changes and committing them. |
- ScheduleFlushOldChanges(); |
- return true; |
-} |
- |
-void TextDatabaseManager::BeginTransaction() { |
- transaction_nesting_++; |
-} |
- |
-void TextDatabaseManager::CommitTransaction() { |
- DCHECK(transaction_nesting_); |
- transaction_nesting_--; |
- if (transaction_nesting_) |
- return; // Still more nesting of transactions before committing. |
- |
- // Commit all databases with open transactions on them. |
- for (DBIdentSet::const_iterator i = open_transactions_.begin(); |
- i != open_transactions_.end(); ++i) { |
- DBCache::iterator iter = db_cache_.Get(*i); |
- if (iter == db_cache_.end()) { |
- NOTREACHED() << "All open transactions should be cached."; |
- continue; |
- } |
- iter->second->CommitTransaction(); |
- } |
- open_transactions_.clear(); |
- |
- // Now that the transaction is over, we can expire old connections. |
- db_cache_.ShrinkToSize(kCacheDBSize); |
-} |
- |
-void TextDatabaseManager::InitDBList() { |
- if (present_databases_loaded_) |
- return; |
- |
- present_databases_loaded_ = true; |
- |
- // Find files on disk matching our pattern so we can quickly test for them. |
- base::FilePath::StringType filepattern(TextDatabase::file_base()); |
- filepattern.append(FILE_PATH_LITERAL("*")); |
- base::FileEnumerator enumerator( |
- dir_, false, base::FileEnumerator::FILES, filepattern); |
- base::FilePath cur_file; |
- while (!(cur_file = enumerator.Next()).empty()) { |
- // Convert to the number representing this file. |
- TextDatabase::DBIdent id = TextDatabase::FileNameToID(cur_file); |
- if (id) // Will be 0 on error. |
- present_databases_.insert(id); |
- } |
-} |
- |
-void TextDatabaseManager::AddPageURL(const GURL& url, |
- URLID url_id, |
- VisitID visit_id, |
- Time time) { |
- // Delete any existing page info. |
- RecentChangeList::iterator found = recent_changes_.Peek(url); |
- if (found != recent_changes_.end()) |
- recent_changes_.Erase(found); |
- |
- // Just save this info for later. We will save it when it expires or when all |
- // the data is complete. |
- recent_changes_.Put(url, PageInfo(url_id, visit_id, time)); |
-} |
- |
-void TextDatabaseManager::AddPageTitle(const GURL& url, |
- const string16& title) { |
- RecentChangeList::iterator found = recent_changes_.Peek(url); |
- if (found == recent_changes_.end()) { |
- // This page is not in our cache of recent pages. This is very much an edge |
- // case as normally a title will come in <20 seconds after the page commits, |
- // and WebContents will avoid spamming us with >1 title per page. However, |
- // it could come up if your connection is unhappy, and we don't want to |
- // miss anything. |
- // |
- // To solve this problem, we'll just associate the most recent visit with |
- // the new title and index that using the regular code path. |
- URLRow url_row; |
- if (!url_database_->GetRowForURL(url, &url_row)) |
- return; // URL is unknown, give up. |
- VisitRow visit; |
- if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) |
- return; // No recent visit, give up. |
- |
- if (visit.is_indexed) { |
- // If this page was already indexed, we could have a body that came in |
- // first and we don't want to overwrite it. We could go query for the |
- // current body, or have a special setter for only the title, but this is |
- // not worth it for this edge case. |
- // |
- // It will be almost impossible for the title to take longer than |
- // kExpirationSeconds yet we got a body in less than that time, since |
- // the title should always come in first. |
- return; |
- } |
- |
- AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, |
- title, string16()); |
- return; // We don't know about this page, give up. |
- } |
- |
- PageInfo& info = found->second; |
- if (info.has_body()) { |
- // This info is complete, write to the database. |
- AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), |
- title, info.body()); |
- recent_changes_.Erase(found); |
- return; |
- } |
- |
- info.set_title(title); |
-} |
- |
-void TextDatabaseManager::AddPageContents(const GURL& url, |
- const string16& body) { |
- RecentChangeList::iterator found = recent_changes_.Peek(url); |
- if (found == recent_changes_.end()) { |
- // This page is not in our cache of recent pages. This means that the page |
- // took more than kExpirationSeconds to load. Often, this will be the result |
- // of a very slow iframe or other resource on the page that makes us think |
- // it's still loading. |
- // |
- // As a fallback, set the most recent visit's contents using the input, and |
- // use the last set title in the URL table as the title to index. |
- URLRow url_row; |
- if (!url_database_->GetRowForURL(url, &url_row)) |
- return; // URL is unknown, give up. |
- VisitRow visit; |
- if (!visit_database_->GetMostRecentVisitForURL(url_row.id(), &visit)) |
- return; // No recent visit, give up. |
- |
- // Use the title from the URL row as the title for the indexing. |
- AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, |
- url_row.title(), body); |
- return; |
- } |
- |
- PageInfo& info = found->second; |
- if (info.has_title()) { |
- // This info is complete, write to the database. |
- AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), |
- info.title(), body); |
- recent_changes_.Erase(found); |
- return; |
- } |
- |
- info.set_body(body); |
-} |
- |
-bool TextDatabaseManager::AddPageData(const GURL& url, |
- URLID url_id, |
- VisitID visit_id, |
- Time visit_time, |
- const string16& title, |
- const string16& body) { |
- TextDatabase* db = GetDBForTime(visit_time, true); |
- if (!db) |
- return false; |
- |
- TimeTicks beginning_time = TimeTicks::Now(); |
- |
- // First delete any recently-indexed data for this page. This will delete |
- // anything in the main database, but we don't bother looking through the |
- // archived database. |
- VisitVector visits; |
- visit_database_->GetIndexedVisitsForURL(url_id, &visits); |
- for (size_t i = 0; i < visits.size(); i++) { |
- visits[i].is_indexed = false; |
- visit_database_->UpdateVisitRow(visits[i]); |
- DeletePageData(visits[i].visit_time, url, NULL); |
- } |
- |
- if (visit_id) { |
- // We're supposed to update the visit database, so load the visit. |
- VisitRow row; |
- if (!visit_database_->GetRowForVisit(visit_id, &row)) { |
- // This situation can occur if Chrome's history is in the process of |
- // being updated, and then the browsing history is deleted before all |
- // updates have been completely performed. In this case, a stale update |
- // to the database is attempted, leading to the warning below. |
- DLOG(WARNING) << "Could not find requested visit #" << visit_id; |
- return false; |
- } |
- |
- DCHECK(visit_time == row.visit_time); |
- |
- // Update the visit database to reference our addition. |
- row.is_indexed = true; |
- if (!visit_database_->UpdateVisitRow(row)) |
- return false; |
- } |
- |
- // Now index the data. |
- std::string url_str = URLDatabase::GURLToDatabaseURL(url); |
- bool success = db->AddPageData(visit_time, url_str, |
- ConvertStringForIndexer(title), |
- ConvertStringForIndexer(body)); |
- |
- UMA_HISTOGRAM_TIMES("History.AddFTSData", |
- TimeTicks::Now() - beginning_time); |
- |
- if (history_publisher_) |
- history_publisher_->PublishPageContent(visit_time, url, title, body); |
- |
- return success; |
-} |
- |
-void TextDatabaseManager::DeletePageData(Time time, const GURL& url, |
- ChangeSet* change_set) { |
- TextDatabase::DBIdent db_ident = TimeToID(time); |
- |
- // We want to open the database for writing, but only if it exists. To |
- // achieve this, we check whether it exists by saying we're not going to |
- // write to it (avoiding the autocreation code normally called when writing) |
- // and then access it for writing only if it succeeds. |
- TextDatabase* db = GetDB(db_ident, false); |
- if (!db) |
- return; |
- db = GetDB(db_ident, true); |
- |
- if (change_set) |
- change_set->Add(db_ident); |
- |
- db->DeletePageData(time, URLDatabase::GURLToDatabaseURL(url)); |
-} |
- |
-void TextDatabaseManager::DeleteFromUncommitted( |
- const std::set<GURL>& restrict_urls, Time begin, Time end) { |
- // First find the beginning of the range to delete. Recall that the list |
- // has the most recent item at the beginning. There won't normally be very |
- // many items, so a brute-force search is fine. |
- RecentChangeList::iterator cur = recent_changes_.begin(); |
- if (!end.is_null()) { |
- // Walk from the beginning of the list backwards in time to find the newest |
- // entry that should be deleted. |
- while (cur != recent_changes_.end() && cur->second.visit_time() >= end) |
- ++cur; |
- } |
- |
- // Now delete all visits up to the oldest one we were supposed to delete. |
- // Note that if begin is_null, it will be less than or equal to any other |
- // time. |
- if (restrict_urls.empty()) { |
- while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) |
- cur = recent_changes_.Erase(cur); |
- } else { |
- while (cur != recent_changes_.end() && cur->second.visit_time() >= begin) { |
- if (restrict_urls.find(cur->first) != restrict_urls.end()) |
- cur = recent_changes_.Erase(cur); |
- else |
- ++cur; |
- } |
- } |
-} |
- |
-void TextDatabaseManager::DeleteFromUncommittedForTimes( |
- const std::vector<base::Time>& times) { |
- // |times| must be in reverse chronological order, i.e. each member |
- // must be earlier than or the same as the one before it. |
- DCHECK( |
- std::adjacent_find( |
- times.begin(), times.end(), std::less<base::Time>()) == |
- times.end()); |
- |
- // Both |recent_changes_| and |times| are in reverse chronological order. |
- RecentChangeList::iterator it = recent_changes_.begin(); |
- std::vector<base::Time>::const_iterator time_it = times.begin(); |
- while (it != recent_changes_.end() && time_it != times.end()) { |
- base::Time visit_time = it->second.visit_time(); |
- if (visit_time == *time_it) { |
- it = recent_changes_.Erase(it); |
- } else if (visit_time < *time_it) { |
- ++time_it; |
- } else /* if (visit_time > *time_it) */ { |
- ++it; |
- } |
- } |
-} |
- |
-void TextDatabaseManager::DeleteAll() { |
- DCHECK_EQ(0, transaction_nesting_) << "Calling deleteAll in a transaction."; |
- |
- InitDBList(); |
- |
- // Delete uncommitted entries. |
- recent_changes_.Clear(); |
- |
- // Close all open databases. |
- db_cache_.Clear(); |
- |
- // Now go through and delete all the files. |
- for (DBIdentSet::iterator i = present_databases_.begin(); |
- i != present_databases_.end(); ++i) { |
- base::FilePath file_name = dir_.Append(TextDatabase::IDToFileName(*i)); |
- sql::Connection::Delete(file_name); |
- } |
-} |
- |
-void TextDatabaseManager::OptimizeChangedDatabases( |
- const ChangeSet& change_set) { |
- for (ChangeSet::DBSet::const_iterator i = |
- change_set.changed_databases_.begin(); |
- i != change_set.changed_databases_.end(); ++i) { |
- // We want to open the database for writing, but only if it exists. To |
- // achieve this, we check whether it exists by saying we're not going to |
- // write to it (avoiding the autocreation code normally called when writing) |
- // and then access it for writing only if it succeeds. |
- TextDatabase* db = GetDB(*i, false); |
- if (!db) |
- continue; |
- db = GetDB(*i, true); |
- if (!db) |
- continue; // The file may have changed or something. |
- db->Optimize(); |
- } |
-} |
- |
-void TextDatabaseManager::GetTextMatches( |
- const string16& query, |
- const QueryOptions& options, |
- std::vector<TextDatabase::Match>* results, |
- Time* first_time_searched) { |
- results->clear(); |
- |
- *first_time_searched = options.begin_time; |
- |
- InitDBList(); |
- if (present_databases_.empty()) |
- return; // Nothing to search. |
- |
- // Get the query into the proper format for the individual DBs. |
- string16 fts_query16; |
- query_parser_.ParseQuery(query, &fts_query16); |
- std::string fts_query = UTF16ToUTF8(fts_query16); |
- |
- // Need a copy of the options so we can modify the max count for each call |
- // to the individual databases. |
- QueryOptions cur_options(options); |
- |
- // Compute the minimum and maximum values for the identifiers that could |
- // encompass the input time range. |
- TextDatabase::DBIdent min_ident = options.begin_time.is_null() ? |
- *present_databases_.begin() : |
- TimeToID(options.begin_time); |
- TextDatabase::DBIdent max_ident = options.end_time.is_null() ? |
- *present_databases_.rbegin() : |
- TimeToID(options.end_time); |
- |
- // Iterate over the databases from the most recent backwards. |
- TextDatabase::URLSet found_urls; |
- for (DBIdentSet::reverse_iterator i = present_databases_.rbegin(); |
- i != present_databases_.rend(); |
- ++i) { |
- // TODO(brettw) allow canceling the query in the middle. |
- // if (canceled_or_something) |
- // break; |
- |
- // This code is stupid, we just loop until we find the correct starting |
- // time range rather than search in an intelligent way. Users will have a |
- // few dozen files at most, so this should not be an issue. |
- if (*i > max_ident) |
- continue; // Haven't gotten to the time range yet. |
- if (*i < min_ident) |
- break; // Covered all the time range. |
- |
- TextDatabase* cur_db = GetDB(*i, false); |
- if (!cur_db) |
- continue; |
- |
- // Adjust the max count according to how many results we've already got. |
- if (options.max_count) { |
- cur_options.max_count = options.max_count - |
- static_cast<int>(results->size()); |
- } |
- |
- bool has_more_results = cur_db->GetTextMatches( |
- fts_query, cur_options, results, &found_urls); |
- |
- DCHECK(static_cast<int>(results->size()) <= options.EffectiveMaxCount()); |
- |
- if (has_more_results || |
- static_cast<int>(results->size()) == options.EffectiveMaxCount()) { |
- // Since the search proceeds backwards in time, the last result we have |
- // gives the first time searched. |
- *first_time_searched = results->back().time; |
- break; |
- } |
- } |
-} |
- |
-size_t TextDatabaseManager::GetUncommittedEntryCountForTest() const { |
- return recent_changes_.size(); |
-} |
- |
-TextDatabase* TextDatabaseManager::GetDB(TextDatabase::DBIdent id, |
- bool for_writing) { |
- DBCache::iterator found_db = db_cache_.Get(id); |
- if (found_db != db_cache_.end()) { |
- if (transaction_nesting_ && for_writing && |
- open_transactions_.find(id) == open_transactions_.end()) { |
- // If we currently have an open transaction, that database is not yet |
- // part of the transaction, and the database will be written to, it needs |
- // to be part of our transaction. |
- found_db->second->BeginTransaction(); |
- open_transactions_.insert(id); |
- } |
- return found_db->second; |
- } |
- |
- // Need to make the database. |
- TextDatabase* new_db = new TextDatabase(dir_, id, for_writing); |
- if (!new_db->Init()) { |
- delete new_db; |
- return NULL; |
- } |
- db_cache_.Put(id, new_db); |
- present_databases_.insert(id); |
- |
- if (transaction_nesting_ && for_writing) { |
- // If we currently have an open transaction and the new database will be |
- // written to, it needs to be part of our transaction. |
- new_db->BeginTransaction(); |
- open_transactions_.insert(id); |
- } |
- |
- // When no transaction is open, allow this new one to kick out an old one. |
- if (!transaction_nesting_) |
- db_cache_.ShrinkToSize(kCacheDBSize); |
- |
- return new_db; |
-} |
- |
-TextDatabase* TextDatabaseManager::GetDBForTime(Time time, |
- bool create_if_necessary) { |
- return GetDB(TimeToID(time), create_if_necessary); |
-} |
- |
-void TextDatabaseManager::ScheduleFlushOldChanges() { |
- weak_factory_.InvalidateWeakPtrs(); |
- base::MessageLoop::current()->PostDelayedTask( |
- FROM_HERE, |
- base::Bind(&TextDatabaseManager::FlushOldChanges, |
- weak_factory_.GetWeakPtr()), |
- base::TimeDelta::FromSeconds(kExpirationSeconds)); |
-} |
- |
-void TextDatabaseManager::FlushOldChanges() { |
- FlushOldChangesForTime(TimeTicks::Now()); |
-} |
- |
-void TextDatabaseManager::FlushOldChangesForTime(TimeTicks now) { |
- // The end of the list is the oldest, so we just start from there committing |
- // things until we get something too new. |
- RecentChangeList::reverse_iterator i = recent_changes_.rbegin(); |
- while (i != recent_changes_.rend() && i->second.Expired(now)) { |
- AddPageData(i->first, i->second.url_id(), i->second.visit_id(), |
- i->second.visit_time(), i->second.title(), i->second.body()); |
- i = recent_changes_.Erase(i); |
- } |
- |
- ScheduleFlushOldChanges(); |
-} |
- |
-} // namespace history |