chrome/browser/history/text_database.cc - Issue 16951015: Remove TextDatabase from the history service.

Unified Diff: chrome/browser/history/text_database.cc

Issue 16951015: Remove TextDatabase from the history service. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@replace_fts

Patch Set: Sync and rebase. Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/text_database.cc

diff --git a/chrome/browser/history/text_database.cc b/chrome/browser/history/text_database.cc

deleted file mode 100644

index 0f5db891caf6cf0315aeb3e068817cb443bba1ab..0000000000000000000000000000000000000000

--- a/chrome/browser/history/text_database.cc

+++ /dev/null

@@ -1,353 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include <limits>

-#include <set>

-#include <string>

-#include "chrome/browser/history/text_database.h"

-#include "base/file_util.h"

-#include "base/logging.h"

-#include "base/metrics/histogram.h"

-#include "base/strings/string_number_conversions.h"

-#include "base/strings/stringprintf.h"

-#include "base/strings/utf_string_conversions.h"

-#include "sql/statement.h"

-#include "sql/transaction.h"

-// There are two tables in each database, one full-text search (FTS) table which

-// indexes the contents and title of the pages. The other is a regular SQLITE

-// table which contains non-indexed information about the page. All columns of

-// a FTS table are indexed using the text search algorithm, which isn't what we

-// want for things like times. If this were in the FTS table, there would be

-// different words in the index for each time number.

-//

-// "pages" FTS table:

-// url URL of the page so searches will match the URL.

-// title Title of the page.

-// body Body of the page.

-//

-// "info" regular table:

-// time Time the corresponding FTS entry was visited.

-//

-// We do joins across these two tables by using their internal rowids, which we

-// keep in sync between the two tables. The internal rowid is the only part of

-// an FTS table that is indexed like a normal table, and the index over it is

-// free since sqlite always indexes the internal rowid.

-namespace history {

-namespace {

-// Version 1 uses FTS2 for index files.

-// Version 2 uses FTS3.

-static const int kCurrentVersionNumber = 2;

-static const int kCompatibleVersionNumber = 2;

-// Snippet computation relies on the index of the columns in the original

-// create statement. These are the 0-based indices (as strings) of the

-// corresponding columns.

-const char kTitleColumnIndex[] = "1";

-const char kBodyColumnIndex[] = "2";

-// The string prepended to the database identifier to generate the filename.

-const base::FilePath::CharType kFilePrefix[] =

- FILE_PATH_LITERAL("History Index ");

-} // namespace

-TextDatabase::Match::Match() {}

-TextDatabase::Match::~Match() {}

-TextDatabase::TextDatabase(const base::FilePath& path,

- DBIdent id,

- bool allow_create)

- : path_(path),

- ident_(id),

- allow_create_(allow_create) {

- // Compute the file name.

- file_name_ = path_.Append(IDToFileName(ident_));

-TextDatabase::~TextDatabase() {

-// static

-const base::FilePath::CharType* TextDatabase::file_base() {

- return kFilePrefix;

-// static

-base::FilePath TextDatabase::IDToFileName(DBIdent id) {

- // Identifiers are intended to be a combination of the year and month, for

- // example, 200801 for January 2008. We convert this to

- // "History Index 2008-01". However, we don't make assumptions about this

- // scheme: the caller should assign IDs as it feels fit with the knowledge

- // that they will apppear on disk in this form.

- base::FilePath::StringType filename(file_base());

- base::StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"),

- id / 100, id % 100);

- return base::FilePath(filename);

-// static

-TextDatabase::DBIdent TextDatabase::FileNameToID(

- const base::FilePath& file_path) {

- base::FilePath::StringType file_name = file_path.BaseName().value();

- // We don't actually check the prefix here. Since the file system could

- // be case insensitive in ways we can't predict (NTFS), checking could

- // potentially be the wrong thing to do. Instead, we just look for a suffix.

- static const size_t kIDStringLength = 7; // Room for "xxxx-xx".

- if (file_name.length() < kIDStringLength)

- return 0;

- const base::FilePath::StringType suffix(

- &file_name[file_name.length() - kIDStringLength]);

- if (suffix.length() != kIDStringLength ||

- suffix[4] != FILE_PATH_LITERAL('-')) {

- return 0;

- }

- // TODO: Once StringPiece supports a templated interface over the

- // underlying string type, use it here instead of substr, since that

- // will avoid needless string copies. StringPiece cannot be used

- // right now because base::FilePath::StringType could use either 8 or 16 bit

- // characters, depending on the OS.

- int year, month;

- base::StringToInt(suffix.substr(0, 4), &year);

- base::StringToInt(suffix.substr(5, 2), &month);

- return year * 100 + month;

-bool TextDatabase::Init() {

- // Make sure, if we're not allowed to create the file, that it exists.

- if (!allow_create_) {

- if (!base::PathExists(file_name_))

- return false;

- }

- db_.set_histogram_tag("Text");

- // Set the database page size to something a little larger to give us

- // better performance (we're typically seek rather than bandwidth limited).

- // This only has an effect before any tables have been created, otherwise

- // this is a NOP. Must be a power of 2 and a max of 8192.

- db_.set_page_size(4096);

- // The default cache size is 2000 which give >8MB of data. Since we will often

- // have 2-3 of these objects, each with their own 8MB, this adds up very fast.

- // We therefore reduce the size so when there are multiple objects, we're not

- // too big.

- db_.set_cache_size(512);

- // Run the database in exclusive mode. Nobody else should be accessing the

- // database while we're running, and this will give somewhat improved perf.

- db_.set_exclusive_locking();

- // Attach the database to our index file.

- if (!db_.Open(file_name_))

- return false;

- // Meta table tracking version information.

- if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber))

- return false;

- if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {

- // This version is too new. We don't bother notifying the user on this

- // error, and just fail to use the file. Normally if they have version skew,

- // they will get it for the main history file and it won't be necessary

- // here. If that's not the case, since this is only indexed data, it's

- // probably better to just not give FTS results than strange errors when

- // everything else is working OK.

- LOG(WARNING) << "Text database is too new.";

- return false;

- }

- return CreateTables();

-void TextDatabase::BeginTransaction() {

- db_.BeginTransaction();

-void TextDatabase::CommitTransaction() {

- db_.CommitTransaction();

-bool TextDatabase::CreateTables() {

- // FTS table of page contents.

- if (!db_.DoesTableExist("pages")) {

- if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3("

- "TOKENIZE icu,"

- "url LONGVARCHAR,"

- "title LONGVARCHAR,"

- "body LONGVARCHAR)"))

- return false;

- }

- // Non-FTS table containing URLs and times so we can efficiently find them

- // using a regular index (all FTS columns are special and are treated as

- // full-text-search, which is not what we want when retrieving this data).

- if (!db_.DoesTableExist("info")) {

- // Note that there is no point in creating an index over time. Since

- // we must always query the entire FTS table (it can not efficiently do

- // subsets), we will always end up doing that first, and joining the info

- // table off of that.

- if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)"))

- return false;

- }

- // Create the index.

- return db_.Execute("CREATE INDEX IF NOT EXISTS info_time ON info(time)");

-bool TextDatabase::AddPageData(base::Time time,

- const std::string& url,

- const std::string& title,

- const std::string& contents) {

- sql::Transaction committer(&db_);

- if (!committer.Begin())

- return false;

- // Add to the pages table.

- sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE,

- "INSERT INTO pages (url, title, body) VALUES (?,?,?)"));

- add_to_pages.BindString(0, url);

- add_to_pages.BindString(1, title);

- add_to_pages.BindString(2, contents);

- if (!add_to_pages.Run())

- return false;

- int64 rowid = db_.GetLastInsertRowId();

- // Add to the info table with the same rowid.

- sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE,

- "INSERT INTO info (rowid, time) VALUES (?,?)"));

- add_to_info.BindInt64(0, rowid);

- add_to_info.BindInt64(1, time.ToInternalValue());

- if (!add_to_info.Run())

- return false;

- return committer.Commit();

-void TextDatabase::DeletePageData(base::Time time, const std::string& url) {

- // First get all rows that match. Selecing on time (which has an index) allows

- // us to avoid brute-force searches on the full-text-index table (there will

- // generally be only one match per time).

- sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE,

- "SELECT info.rowid "

- "FROM info JOIN pages ON info.rowid = pages.rowid "

- "WHERE info.time=? AND pages.url=?"));

- select_ids.BindInt64(0, time.ToInternalValue());

- select_ids.BindString(1, url);

- std::set<int64> rows_to_delete;

- while (select_ids.Step())

- rows_to_delete.insert(select_ids.ColumnInt64(0));

- // Delete from the pages table.

- sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE,

- "DELETE FROM pages WHERE rowid=?"));

- for (std::set<int64>::const_iterator i = rows_to_delete.begin();

- i != rows_to_delete.end(); ++i) {

- delete_page.BindInt64(0, *i);

- if (!delete_page.Run())

- return;

- delete_page.Reset(true);

- }

- // Delete from the info table.

- sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE,

- "DELETE FROM info WHERE rowid=?"));

- for (std::set<int64>::const_iterator i = rows_to_delete.begin();

- i != rows_to_delete.end(); ++i) {

- delete_info.BindInt64(0, *i);

- if (!delete_info.Run())

- return;

- delete_info.Reset(true);

- }

-void TextDatabase::Optimize() {

- sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,

- "SELECT OPTIMIZE(pages) FROM pages LIMIT 1"));

- statement.Run();

-bool TextDatabase::GetTextMatches(const std::string& query,

- const QueryOptions& options,

- std::vector<Match>* results,

- URLSet* found_urls) {

- std::string sql = "SELECT url, title, time, offsets(pages), body FROM pages "

- "LEFT OUTER JOIN info ON pages.rowid = info.rowid WHERE ";

- sql += options.body_only ? "body " : "pages ";

- sql += "MATCH ? AND time >= ? AND time < ? ";

- // Times may not be unique, so also sort by rowid to ensure a stable order.

- sql += "ORDER BY time DESC, info.rowid DESC";

- // Generate unique IDs for the two possible variations of the statement,

- // so they don't share the same cached prepared statement.

- sql::StatementID body_only_id = SQL_FROM_HERE;

- sql::StatementID pages_id = SQL_FROM_HERE;

- sql::Statement statement(db_.GetCachedStatement(

- (options.body_only ? body_only_id : pages_id), sql.c_str()));

- statement.BindString(0, query);

- statement.BindInt64(1, options.EffectiveBeginTime());

- statement.BindInt64(2, options.EffectiveEndTime());

- // |results| may not be initially empty, so keep track of how many were added

- // by this call.

- int result_count = 0;

- while (statement.Step()) {

- // TODO(brettw) allow canceling the query in the middle.

- // if (canceled_or_something)

- // break;

- GURL url(statement.ColumnString(0));

- URLSet::const_iterator found_url = found_urls->find(url);

- if (found_url != found_urls->end())

- continue; // Don't add this duplicate.

- if (++result_count > options.EffectiveMaxCount())

- break;

- // Fill the results into the vector (avoid copying the URL with Swap()).

- results->resize(results->size() + 1);

- Match& match = results->at(results->size() - 1);

- match.url.Swap(&url);

- match.title = statement.ColumnString16(1);

- match.time = base::Time::FromInternalValue(statement.ColumnInt64(2));

- // Extract any matches in the title.

- std::string offsets_str = statement.ColumnString(3);

- Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex,

- &match.title_match_positions);

- Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1),

- &match.title_match_positions);

- // Extract the matches in the body.

- Snippet::MatchPositions match_positions;

- Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex,

- &match_positions);

- // Compute the snippet based on those matches.

- std::string body = statement.ColumnString(4);

- match.snippet.ComputeSnippet(match_positions, body);

- }

- statement.Reset(true);

- return result_count > options.EffectiveMaxCount();

-} // namespace history

« no previous file with comments | « chrome/browser/history/text_database.h ('k') | chrome/browser/history/text_database_manager.h » ('j') | no next file with comments »