| Index: chrome/browser/history/url_index_private_data.h
|
| ===================================================================
|
| --- chrome/browser/history/url_index_private_data.h (revision 152962)
|
| +++ chrome/browser/history/url_index_private_data.h (working copy)
|
| @@ -11,9 +11,8 @@
|
| #include "base/file_path.h"
|
| #include "base/gtest_prod_util.h"
|
| #include "base/memory/ref_counted.h"
|
| -#include "base/synchronization/lock.h"
|
| -#include "base/threading/sequenced_worker_pool.h"
|
| #include "chrome/browser/history/in_memory_url_index_types.h"
|
| +#include "chrome/browser/history/in_memory_url_index_cache.pb.h"
|
| #include "chrome/browser/history/scored_history_match.h"
|
| #include "content/public/browser/notification_details.h"
|
|
|
| @@ -28,142 +27,34 @@
|
| namespace imui = in_memory_url_index;
|
|
|
| class HistoryDatabase;
|
| -class InMemoryURLCacheDatabase;
|
| class InMemoryURLIndex;
|
| class RefCountedBool;
|
|
|
| -// A structure private to InMemoryURLIndex describing its internal data and
|
| -// providing for restoring, rebuilding and updating that internal data. As
|
| -// this class is for exclusive use by the InMemoryURLIndex class there should
|
| -// be no calls from any other class.
|
| -//
|
| -// All public member functions are called on the main thread unless otherwise
|
| -// annotated.
|
| +// Current version of the cache file.
|
| +static const int kCurrentCacheFileVersion = 1;
|
| +
|
| +// A structure describing the InMemoryURLIndex's internal data and providing for
|
| +// restoring, rebuilding and updating that internal data.
|
| class URLIndexPrivateData
|
| : public base::RefCountedThreadSafe<URLIndexPrivateData> {
|
| public:
|
| - // Creates a new instance of private data, creating or opening the cache
|
| - // database located in |history_dir|. |languages| is used to break down
|
| - // search terms, URLs, and page titles into words and characters.
|
| - URLIndexPrivateData(const FilePath& history_dir,
|
| - const std::string& languages);
|
| + URLIndexPrivateData();
|
|
|
| - // Initializes the private data and its cache database. Returns true if the
|
| - // database is successfully initialized. Any failures will mark the cache
|
| - // database as not enabled. |sequence_token| is used to coordinate all
|
| - // future database operations (not including those performed during this
|
| - // initialization). Called on the DB thread.
|
| - bool Init(base::SequencedWorkerPool::SequenceToken sequence_token);
|
| -
|
| - // Performs a Clear() and then erases the cache database. Called on the
|
| - // worker pool sequenced by InMemoryURLIndex's |sequence_token_|.
|
| - void Reset();
|
| -
|
| - // Returns true if there is no data in the index.
|
| - bool Empty() const;
|
| -
|
| - // Returns a copy of the private data for archiving purposes.
|
| - URLIndexPrivateData* Snapshot() const;
|
| -
|
| - // Closes the database.
|
| - void Shutdown();
|
| -
|
| - // Verifies that the private data is consistent.
|
| - bool ValidateConsistency() const;
|
| -
|
| - // Given a string16 in |search_string|, scans the history index and returns a
|
| - // vector with all scored, matching history items. The |search_string| is
|
| - // broken down into individual terms (words), each of which must occur in the
|
| - // candidate history item's URL or page title for the item to qualify;
|
| - // however, the terms do not necessarily have to be adjacent. Once we have
|
| - // a set of candidates, they are filtered to insure that all |search_string|
|
| - // terms, as separated by whitespace, occur within the candidate's URL
|
| - // or page title. Scores are then calculated on no more than
|
| - // |kItemsToScoreLimit| candidates, as the scoring of such a large number of
|
| - // candidates may cause perceptible typing response delays in the omnibox.
|
| - // This is likely to occur for short omnibox terms such as 'h' and 'w' which
|
| - // will be found in nearly all history candidates. Results are sorted by
|
| - // descending score. The full results set (i.e. beyond the
|
| - // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls
|
| - // to this function.
|
| - ScoredHistoryMatches HistoryItemsForTerms(const string16& search_string);
|
| -
|
| - // Adds the history item in |row| to the index if it does not already already
|
| - // exist and it meets the minimum 'quick' criteria. If the row already exists
|
| - // in the index then the index will be updated if the row still meets the
|
| - // criteria, otherwise the row will be removed from the index. Returns true
|
| - // if the index was actually updated. Posts updates to the cache database
|
| - // that are run on the worker pool sequenced by InMemoryURLIndex's
|
| - // |sequence_token_|.
|
| - bool UpdateURL(const URLRow& row);
|
| -
|
| - // Deletes index data for the history item with the given |url|.
|
| - // The item may not have actually been indexed, which is the case if it did
|
| - // not previously meet minimum 'quick' criteria. Returns true if the index
|
| - // was actually updated. Posts updates to the cache database that are run on
|
| - // the worker pool sequenced by InMemoryURLIndex's |sequence_token_|.
|
| - bool DeleteURL(const GURL& url);
|
| -
|
| - // Sets if the cache database is enabled.
|
| - void set_cache_enabled(bool enabled) { cache_enabled_ = enabled; }
|
| -
|
| - // Returns the cache database.
|
| - InMemoryURLCacheDatabase* cache_db() { return cache_db_.get(); }
|
| -
|
| - // Restores the index data from the contents of the cache database. This is
|
| - // called on the DB thread during profile startup and returns true upon a
|
| - // successful restoration. Restoration will fail if there is no cache
|
| - // database or the cache database has been corrupted. All other database
|
| - // operations (i.e. updates from site visits, etc.) will be postponed while
|
| - // this task is being run.
|
| - bool RestoreFromCacheTask();
|
| -
|
| - // Constructs a new private data object by rebuilding its contents from the
|
| - // history database in |history_db|. Returns the new URLIndexPrivateData which
|
| - // on success will contain the rebuilt data but upon failure will be empty.
|
| - // |history_dir| points to the directory in which the cache database will be
|
| - // created. |old_data| provides the cache database and the languages to be
|
| - // used for breaking down search terms, URLs and page titles. This is called
|
| - // on the DB thread during profile startup iff restoring from the cache
|
| - // database fails (see also RestoreFromCacheTask()). All other database
|
| - // operations (i.e. updates from site visits, etc.) will be postponed while
|
| - // this task is being run.
|
| - static scoped_refptr<URLIndexPrivateData> RebuildFromHistory(
|
| - HistoryDatabase* history_db,
|
| - scoped_refptr<URLIndexPrivateData> old_data);
|
| -
|
| - // Completely refreshes the contents of the cache database using the contents
|
| - // of the in-memory index data. This task is performed on the sequenced
|
| - // blocking pool using the sequence_token with which this instance was
|
| - // Init'ed. A refresh will occur 1) during profile startup if a
|
| - // RebuildFromHistory(...) is required, or 2) at any time database corruption
|
| - // is detected while updating the database in an attempt to repair the
|
| - // corruption.
|
| - void RefreshCacheTask();
|
| -
|
| - static void InitializeSchemeWhitelistForTesting(
|
| - std::set<std::string>* whitelist);
|
| -
|
| private:
|
| friend class base::RefCountedThreadSafe<URLIndexPrivateData>;
|
| + ~URLIndexPrivateData();
|
| +
|
| friend class AddHistoryMatch;
|
| friend class ::HistoryQuickProviderTest;
|
| - friend class InMemoryURLCacheDatabase;
|
| - friend class InMemoryURLIndexCacheTest;
|
| + friend class InMemoryURLIndex;
|
| friend class InMemoryURLIndexTest;
|
| - friend class InMemoryURLIndexBaseTest;
|
| - friend class IntercessionaryIndexTest;
|
| - friend class URLIndexOldCacheTest;
|
| - friend class URLIndexPrivateDataTest;
|
| + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet);
|
| + FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching);
|
| FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs);
|
| - FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest, CacheDatabaseFailure);
|
| - FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest,
|
| - ShutdownDuringCacheRefresh);
|
| FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
|
| - FRIEND_TEST_ALL_PREFIXES(URLIndexPrivateDataTest, CacheFetch);
|
|
|
| // Support caching of term results so that we can optimize searches which
|
| // build upon a previous search. Each entry in this map represents one
|
| @@ -232,41 +123,118 @@
|
| const history::HistoryInfoMap& history_info_map_;
|
| };
|
|
|
| - // Creates a new instance of private data for purposes of rebuilding from
|
| - // the history database while simultaneously allowing continued use of an
|
| - // older private data |old_data|. The old data will still be used for
|
| - // providing search results. Any updates to the private data will be queued
|
| - // for application to the new data once it has been successfully rebuilt.
|
| - URLIndexPrivateData(const URLIndexPrivateData& old_data);
|
| + // Given a string16 in |term_string|, scans the history index and returns a
|
| + // vector with all scored, matching history items. The |term_string| is
|
| + // broken down into individual terms (words), each of which must occur in the
|
| + // candidate history item's URL or page title for the item to qualify;
|
| + // however, the terms do not necessarily have to be adjacent. Once we have
|
| + // a set of candidates, they are filtered to insure that all |term_string|
|
| + // terms, as separated by whitespace, occur within the candidate's URL
|
| + // or page title. Scores are then calculated on no more than
|
| + // |kItemsToScoreLimit| candidates, as the scoring of such a large number of
|
| + // candidates may cause perceptible typing response delays in the omnibox.
|
| + // This is likely to occur for short omnibox terms such as 'h' and 'w' which
|
| + // will be found in nearly all history candidates. Results are sorted by
|
| + // descending score. The full results set (i.e. beyond the
|
| + // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls
|
| + // to this function.
|
| + ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string);
|
|
|
| - // The following constructor is for unit testing purposes only.
|
| - URLIndexPrivateData();
|
| + // Creates a new URLIndexPrivateData object, populates it from the contents
|
| + // of the cache file stored in |file_path|, and assigns it to |private_data|.
|
| + // |languages| will be used to break URLs and page titles into words.
|
| + static void RestoreFromFileTask(
|
| + const FilePath& file_path,
|
| + scoped_refptr<URLIndexPrivateData> private_data,
|
| + const std::string& languages);
|
|
|
| - virtual ~URLIndexPrivateData();
|
| + // Constructs a new object by restoring its contents from the file at |path|.
|
| + // Returns the new URLIndexPrivateData which on success will contain the
|
| + // restored data but upon failure will be empty. |languages| will be used to
|
| + // break URLs and page titles into words
|
| + static scoped_refptr<URLIndexPrivateData> RestoreFromFile(
|
| + const FilePath& path,
|
| + const std::string& languages);
|
|
|
| - // Returns true if the profile is shutting down. Thread-safe.
|
| - bool IsShutdown() const;
|
| + // Constructs a new object by rebuilding its contents from the history
|
| + // database in |history_db|. Returns the new URLIndexPrivateData which on
|
| + // success will contain the rebuilt data but upon failure will be empty.
|
| + // |languages| gives a list of language encodings by which the URLs and page
|
| + // titles are broken down into words and characters.
|
| + static scoped_refptr<URLIndexPrivateData> RebuildFromHistory(
|
| + HistoryDatabase* history_db,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist);
|
|
|
| - // Gets if the cache database is enabled.
|
| - bool cache_enabled() const { return cache_enabled_ && cache_db_; }
|
| + // Writes |private_data| as a cache file to |file_path| and returns success
|
| + // via |succeeded|.
|
| + static void WritePrivateDataToCacheFileTask(
|
| + scoped_refptr<URLIndexPrivateData> private_data,
|
| + const FilePath& file_path,
|
| + scoped_refptr<RefCountedBool> succeeded);
|
|
|
| - // Initializes all index private data members in preparation for restoring,
|
| - // rebuilding or resetting the index.
|
| + // Caches the index private data and writes the cache file to the profile
|
| + // directory. Called by WritePrivateDataToCacheFileTask.
|
| + bool SaveToFile(const FilePath& file_path);
|
| +
|
| + // Initializes all index data members in preparation for restoring the index
|
| + // from the cache or a complete rebuild from the history database.
|
| void Clear();
|
|
|
| + // Returns true if there is no data in the index.
|
| + bool Empty() const;
|
| +
|
| + // Creates a copy of ourself.
|
| + scoped_refptr<URLIndexPrivateData> Duplicate() const;
|
| +
|
| + // Adds |word_id| to |history_id|'s entry in the history/word map,
|
| + // creating a new entry if one does not already exist.
|
| + void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
|
| +
|
| + // Given a set of Char16s, finds words containing those characters.
|
| + WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
|
| +
|
| // URL History indexing support functions.
|
|
|
| // Indexes one URL history item as described by |row|. Returns true if the
|
| - // row was actually indexed.
|
| - bool IndexRow(const URLRow& row);
|
| + // row was actually indexed. |languages| gives a list of language encodings by
|
| + // which the URLs and page titles are broken down into words and characters.
|
| + // |scheme_whitelist| is used to filter non-qualifying schemes.
|
| + bool IndexRow(const URLRow& row,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist);
|
|
|
| + // Adds the history item in |row| to the index if it does not already already
|
| + // exist and it meets the minimum 'quick' criteria. If the row already exists
|
| + // in the index then the index will be updated if the row still meets the
|
| + // criteria, otherwise the row will be removed from the index. Returns true
|
| + // if the index was actually updated. |languages| gives a list of language
|
| + // encodings by which the URLs and page titles are broken down into words and
|
| + // characters. |scheme_whitelist| is used to filter non-qualifying schemes.
|
| + bool UpdateURL(const URLRow& row,
|
| + const std::string& languages,
|
| + const std::set<std::string>& scheme_whitelist);
|
| +
|
| + // Deletes index data for the history item with the given |url|.
|
| + // The item may not have actually been indexed, which is the case if it did
|
| + // not previously meet minimum 'quick' criteria. Returns true if the index
|
| + // was actually updated.
|
| + bool DeleteURL(const GURL& url);
|
| +
|
| // Parses and indexes the words in the URL and page title of |row| and
|
| // calculate the word starts in each, saving the starts in |word_starts|.
|
| // |languages| gives a list of language encodings by which the URLs and page
|
| // titles are broken down into words and characters.
|
| void AddRowWordsToIndex(const URLRow& row,
|
| - RowWordStarts* word_starts);
|
| + RowWordStarts* word_starts,
|
| + const std::string& languages);
|
|
|
| + // Removes |row| and all associated words and characters from the index.
|
| + void RemoveRowFromIndex(const URLRow& row);
|
| +
|
| + // Removes all words and characters associated with |row| from the index.
|
| + void RemoveRowWordsFromIndex(const URLRow& row);
|
| +
|
| // Given a single word in |uni_word|, adds a reference for the containing
|
| // history item identified by |history_id| to the index.
|
| void AddWordToIndex(const string16& uni_word, HistoryID history_id);
|
| @@ -279,16 +247,6 @@
|
| // |history_id| as the initial element of the word's set.
|
| void AddWordHistory(const string16& uni_word, HistoryID history_id);
|
|
|
| - // Removes |row| and all associated words and characters from the index.
|
| - void RemoveRowFromIndex(const URLRow& row);
|
| -
|
| - // Removes all words and characters associated with |row| from the index.
|
| - void RemoveRowWordsFromIndex(const URLRow& row);
|
| -
|
| - // Adds |word_id| to |history_id|'s entry in the history/word map,
|
| - // creating a new entry if one does not already exist.
|
| - void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
|
| -
|
| // Clears |used_| for each item in the search term cache.
|
| void ResetSearchTermCache();
|
|
|
| @@ -300,52 +258,42 @@
|
| // ids for the given term given in |term|.
|
| HistoryIDSet HistoryIDsForTerm(const string16& term);
|
|
|
| - // Given a set of Char16s, finds words containing those characters.
|
| - WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
|
| + // Encode a data structure into the protobuf |cache|.
|
| + void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const;
|
| + void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const;
|
|
|
| - // Restores our contents from the cache database |cache_db|.
|
| - bool RestoreFromCache(InMemoryURLCacheDatabase* cache_db);
|
| + // Decode a data structure from the protobuf |cache|. Return false if there
|
| + // is any kind of failure. |languages| will be used to break URLs and page
|
| + // titles into words
|
| + bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache,
|
| + const std::string& languages);
|
| + bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache);
|
| + bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache);
|
| + bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache);
|
| + bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache);
|
| + bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache);
|
| + bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache,
|
| + const std::string& languages);
|
|
|
| - // Deletes any old style protobuf-based cache file.
|
| - void DeleteOldVersionCacheFile() const;
|
| -
|
| - // Constructs a file path for the cache database within the same directory
|
| - // where the history database is kept and saves that path to |file_path|.
|
| - // Returns true if |file_path| can be successfully constructed.
|
| - bool GetCacheDBPath(FilePath* file_path);
|
| -
|
| - // Sets the cache database for testing. Takes ownership of |test_db|.
|
| - void SetCacheDatabaseForTesting(InMemoryURLCacheDatabase* test_db);
|
| -
|
| // Determines if |gurl| has a whitelisted scheme and returns true if so.
|
| static bool URLSchemeIsWhitelisted(const GURL& gurl,
|
| const std::set<std::string>& whitelist);
|
|
|
| - // Directory where cache database (and older protobuf-based cache file)
|
| - // resides. Except when unit testing, this is the same directory in which
|
| - // the profile's history database is found.
|
| - FilePath history_dir_;
|
| -
|
| - // Languages used during the word-breaking process during indexing.
|
| - std::string languages_;
|
| -
|
| // Cache of search terms.
|
| SearchTermCacheMap search_term_cache_;
|
|
|
| - // The cache database.
|
| - scoped_refptr<InMemoryURLCacheDatabase> cache_db_;
|
| -
|
| - // true if the cache is enabled.
|
| - bool cache_enabled_;
|
| -
|
| - // true once the shutdown process has begun.
|
| - bool shutdown_;
|
| -
|
| - // Guard that prevents simultaneous, cross-thread access to |shutdown_|.
|
| - mutable base::Lock lock_;
|
| -
|
| // Start of data members that are cached -------------------------------------
|
|
|
| + // The version of the cache file most recently used to restore this instance
|
| + // of the private data. If the private data was rebuilt from the history
|
| + // database this will be 0.
|
| + int restored_cache_version_;
|
| +
|
| // A list of all of indexed words. The index of a word in this list is the
|
| // ID of the word in the word_map_. It reduces the memory overhead by
|
| // replacing a potentially long and repeated string with a simple index.
|
| @@ -388,8 +336,10 @@
|
|
|
| // End of data members that are cached ---------------------------------------
|
|
|
| - // Only URLs with a whitelisted scheme are indexed.
|
| - std::set<std::string> scheme_whitelist_;
|
| + // For unit testing only. Specifies the version of the cache file to be saved.
|
| + // Used only for testing upgrading of an older version of the cache upon
|
| + // restore.
|
| + int saved_cache_version_;
|
|
|
| // Used for unit testing only. Records the number of candidate history items
|
| // at three stages in the index searching process.
|
|
|