OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <set> | 8 #include <set> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/file_path.h" | 11 #include "base/file_path.h" |
12 #include "base/gtest_prod_util.h" | 12 #include "base/gtest_prod_util.h" |
13 #include "base/memory/ref_counted.h" | 13 #include "base/memory/ref_counted.h" |
| 14 #include "base/synchronization/lock.h" |
| 15 #include "base/threading/sequenced_worker_pool.h" |
14 #include "chrome/browser/history/in_memory_url_index_types.h" | 16 #include "chrome/browser/history/in_memory_url_index_types.h" |
15 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | |
16 #include "chrome/browser/history/scored_history_match.h" | 17 #include "chrome/browser/history/scored_history_match.h" |
17 #include "content/public/browser/notification_details.h" | 18 #include "content/public/browser/notification_details.h" |
18 | 19 |
19 class HistoryQuickProviderTest; | 20 class HistoryQuickProviderTest; |
20 | 21 |
21 namespace in_memory_url_index { | 22 namespace in_memory_url_index { |
22 class InMemoryURLIndexCacheItem; | 23 class InMemoryURLIndexCacheItem; |
23 } | 24 } |
24 | 25 |
25 namespace history { | 26 namespace history { |
26 | 27 |
27 namespace imui = in_memory_url_index; | 28 namespace imui = in_memory_url_index; |
28 | 29 |
29 class HistoryDatabase; | 30 class HistoryDatabase; |
| 31 class InMemoryURLCacheDatabase; |
30 class InMemoryURLIndex; | 32 class InMemoryURLIndex; |
31 class RefCountedBool; | 33 class RefCountedBool; |
32 | 34 |
33 // Current version of the cache file. | 35 // A structure private to InMemoryURLIndex describing its internal data and |
34 static const int kCurrentCacheFileVersion = 1; | 36 // providing for restoring, rebuilding and updating that internal data. As |
35 | 37 // this class is for exclusive use by the InMemoryURLIndex class there should |
36 // A structure describing the InMemoryURLIndex's internal data and providing for | 38 // be no calls from any other class. |
37 // restoring, rebuilding and updating that internal data. | 39 // |
| 40 // All public member functions are called on the main thread unless otherwise |
| 41 // annotated. |
38 class URLIndexPrivateData | 42 class URLIndexPrivateData |
39 : public base::RefCountedThreadSafe<URLIndexPrivateData> { | 43 : public base::RefCountedThreadSafe<URLIndexPrivateData> { |
40 public: | 44 public: |
41 URLIndexPrivateData(); | 45 // Creates a new instance of private data, creating or opening the cache |
| 46 // database located in |history_dir|. |languages| is used to break down |
| 47 // search terms, URLs, and page titles into words and characters. |
| 48 URLIndexPrivateData(const FilePath& history_dir, |
| 49 const std::string& languages); |
| 50 |
| 51 // Initializes the private data and its cache database. Returns true if the |
| 52 // database is successfully initialized. Any failures will mark the cache |
| 53 // database as not enabled. |sequence_token| is used to coordinate all |
| 54 // future database operations (not including those performed during this |
| 55 // initialization). Called on the DB thread. |
| 56 bool Init(base::SequencedWorkerPool::SequenceToken sequence_token); |
| 57 |
| 58 // Performs a Clear() and then erases the cache database. Called on the |
| 59 // worker pool sequenced by InMemoryURLIndex's |sequence_token_|. |
| 60 void Reset(); |
| 61 |
| 62 // Returns true if there is no data in the index. |
| 63 bool Empty() const; |
| 64 |
| 65 // Returns a copy of the private data for archiving purposes. |
| 66 URLIndexPrivateData* Snapshot() const; |
| 67 |
| 68 // Closes the database. |
| 69 void Shutdown(); |
| 70 |
| 71 // Verifies that the private data is consistent. |
| 72 bool ValidateConsistency() const; |
| 73 |
| 74 // Given a string16 in |search_string|, scans the history index and returns a |
| 75 // vector with all scored, matching history items. The |search_string| is |
| 76 // broken down into individual terms (words), each of which must occur in the |
| 77 // candidate history item's URL or page title for the item to qualify; |
| 78 // however, the terms do not necessarily have to be adjacent. Once we have |
| 79 // a set of candidates, they are filtered to insure that all |search_string| |
| 80 // terms, as separated by whitespace, occur within the candidate's URL |
| 81 // or page title. Scores are then calculated on no more than |
| 82 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
| 83 // candidates may cause perceptible typing response delays in the omnibox. |
| 84 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
| 85 // will be found in nearly all history candidates. Results are sorted by |
| 86 // descending score. The full results set (i.e. beyond the |
| 87 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 88 // to this function. |
| 89 ScoredHistoryMatches HistoryItemsForTerms(const string16& search_string); |
| 90 |
| 91 // Adds the history item in |row| to the index if it does not already already |
| 92 // exist and it meets the minimum 'quick' criteria. If the row already exists |
| 93 // in the index then the index will be updated if the row still meets the |
| 94 // criteria, otherwise the row will be removed from the index. Returns true |
| 95 // if the index was actually updated. Posts updates to the cache database |
| 96 // that are run on the worker pool sequenced by InMemoryURLIndex's |
| 97 // |sequence_token_|. |
| 98 bool UpdateURL(const URLRow& row); |
| 99 |
| 100 // Deletes index data for the history item with the given |url|. |
| 101 // The item may not have actually been indexed, which is the case if it did |
| 102 // not previously meet minimum 'quick' criteria. Returns true if the index |
| 103 // was actually updated. Posts updates to the cache database that are run on |
| 104 // the worker pool sequenced by InMemoryURLIndex's |sequence_token_|. |
| 105 bool DeleteURL(const GURL& url); |
| 106 |
| 107 // Sets if the cache database is enabled. |
| 108 void set_cache_enabled(bool enabled) { cache_enabled_ = enabled; } |
| 109 |
| 110 // Returns the cache database. |
| 111 InMemoryURLCacheDatabase* cache_db() { return cache_db_.get(); } |
| 112 |
| 113 // Restores the index data from the contents of the cache database. This is |
| 114 // called on the DB thread during profile startup and returns true upon a |
| 115 // successful restoration. Restoration will fail if there is no cache |
| 116 // database or the cache database has been corrupted. All other database |
| 117 // operations (i.e. updates from site visits, etc.) will be postponed while |
| 118 // this task is being run. |
| 119 bool RestoreFromCacheTask(); |
| 120 |
| 121 // Constructs a new private data object by rebuilding its contents from the |
| 122 // history database in |history_db|. Returns the new URLIndexPrivateData which |
| 123 // on success will contain the rebuilt data but upon failure will be empty. |
| 124 // |history_dir| points to the directory in which the cache database will be |
| 125 // created. |old_data| provides the cache database and the languages to be |
| 126 // used for breaking down search terms, URLs and page titles. This is called |
| 127 // on the DB thread during profile startup iff restoring from the cache |
| 128 // database fails (see also RestoreFromCacheTask()). All other database |
| 129 // operations (i.e. updates from site visits, etc.) will be postponed while |
| 130 // this task is being run. |
| 131 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 132 HistoryDatabase* history_db, |
| 133 scoped_refptr<URLIndexPrivateData> old_data); |
| 134 |
| 135 // Completely refreshes the contents of the cache database using the contents |
| 136 // of the in-memory index data. This task is performed on the sequenced |
| 137 // blocking pool using the sequence_token with which this instance was |
| 138 // Init'ed. A refresh will occur 1) during profile startup if a |
| 139 // RebuildFromHistory(...) is required, or 2) at any time database corruption |
| 140 // is detected while updating the database in an attempt to repair the |
| 141 // corruption. |
| 142 void RefreshCacheTask(); |
| 143 |
| 144 static void InitializeSchemeWhitelistForTesting( |
| 145 std::set<std::string>* whitelist); |
42 | 146 |
43 private: | 147 private: |
44 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; | 148 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; |
45 ~URLIndexPrivateData(); | |
46 | |
47 friend class AddHistoryMatch; | 149 friend class AddHistoryMatch; |
48 friend class ::HistoryQuickProviderTest; | 150 friend class ::HistoryQuickProviderTest; |
49 friend class InMemoryURLIndex; | 151 friend class InMemoryURLCacheDatabase; |
| 152 friend class InMemoryURLIndexCacheTest; |
50 friend class InMemoryURLIndexTest; | 153 friend class InMemoryURLIndexTest; |
51 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 154 friend class InMemoryURLIndexBaseTest; |
| 155 friend class IntercessionaryIndexTest; |
| 156 friend class URLIndexOldCacheTest; |
| 157 friend class URLIndexPrivateDataTest; |
52 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 158 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
53 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | |
54 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 159 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
55 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 160 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
56 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 161 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| 162 FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest, CacheDatabaseFailure); |
| 163 FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest, |
| 164 ShutdownDuringCacheRefresh); |
57 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 165 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 166 FRIEND_TEST_ALL_PREFIXES(URLIndexPrivateDataTest, CacheFetch); |
58 | 167 |
59 // Support caching of term results so that we can optimize searches which | 168 // Support caching of term results so that we can optimize searches which |
60 // build upon a previous search. Each entry in this map represents one | 169 // build upon a previous search. Each entry in this map represents one |
61 // search term from the most recent search. For example, if the user had | 170 // search term from the most recent search. For example, if the user had |
62 // typed "google blog trans" and then typed an additional 'l' (at the end, | 171 // typed "google blog trans" and then typed an additional 'l' (at the end, |
63 // of course) then there would be four items in the cache: 'blog', 'google', | 172 // of course) then there would be four items in the cache: 'blog', 'google', |
64 // 'trans', and 'transl'. All would be marked as being in use except for the | 173 // 'trans', and 'transl'. All would be marked as being in use except for the |
65 // 'trans' item; its cached data would have been used when optimizing the | 174 // 'trans' item; its cached data would have been used when optimizing the |
66 // construction of the search results candidates for 'transl' but then would | 175 // construction of the search results candidates for 'transl' but then would |
67 // no longer needed. | 176 // no longer needed. |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 public: | 225 public: |
117 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); | 226 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); |
118 ~HistoryItemFactorGreater(); | 227 ~HistoryItemFactorGreater(); |
119 | 228 |
120 bool operator()(const HistoryID h1, const HistoryID h2); | 229 bool operator()(const HistoryID h1, const HistoryID h2); |
121 | 230 |
122 private: | 231 private: |
123 const history::HistoryInfoMap& history_info_map_; | 232 const history::HistoryInfoMap& history_info_map_; |
124 }; | 233 }; |
125 | 234 |
126 // Given a string16 in |term_string|, scans the history index and returns a | 235 // Creates a new instance of private data for purposes of rebuilding from |
127 // vector with all scored, matching history items. The |term_string| is | 236 // the history database while simultaneously allowing continued use of an |
128 // broken down into individual terms (words), each of which must occur in the | 237 // older private data |old_data|. The old data will still be used for |
129 // candidate history item's URL or page title for the item to qualify; | 238 // providing search results. Any updates to the private data will be queued |
130 // however, the terms do not necessarily have to be adjacent. Once we have | 239 // for application to the new data once it has been successfully rebuilt. |
131 // a set of candidates, they are filtered to insure that all |term_string| | 240 URLIndexPrivateData(const URLIndexPrivateData& old_data); |
132 // terms, as separated by whitespace, occur within the candidate's URL | |
133 // or page title. Scores are then calculated on no more than | |
134 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of | |
135 // candidates may cause perceptible typing response delays in the omnibox. | |
136 // This is likely to occur for short omnibox terms such as 'h' and 'w' which | |
137 // will be found in nearly all history candidates. Results are sorted by | |
138 // descending score. The full results set (i.e. beyond the | |
139 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | |
140 // to this function. | |
141 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | |
142 | 241 |
143 // Creates a new URLIndexPrivateData object, populates it from the contents | 242 // The following constructor is for unit testing purposes only. |
144 // of the cache file stored in |file_path|, and assigns it to |private_data|. | 243 URLIndexPrivateData(); |
145 // |languages| will be used to break URLs and page titles into words. | |
146 static void RestoreFromFileTask( | |
147 const FilePath& file_path, | |
148 scoped_refptr<URLIndexPrivateData> private_data, | |
149 const std::string& languages); | |
150 | 244 |
151 // Constructs a new object by restoring its contents from the file at |path|. | 245 virtual ~URLIndexPrivateData(); |
152 // Returns the new URLIndexPrivateData which on success will contain the | |
153 // restored data but upon failure will be empty. |languages| will be used to | |
154 // break URLs and page titles into words | |
155 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( | |
156 const FilePath& path, | |
157 const std::string& languages); | |
158 | 246 |
159 // Constructs a new object by rebuilding its contents from the history | 247 // Returns true if the profile is shutting down. Thread-safe. |
160 // database in |history_db|. Returns the new URLIndexPrivateData which on | 248 bool IsShutdown() const; |
161 // success will contain the rebuilt data but upon failure will be empty. | |
162 // |languages| gives a list of language encodings by which the URLs and page | |
163 // titles are broken down into words and characters. | |
164 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | |
165 HistoryDatabase* history_db, | |
166 const std::string& languages, | |
167 const std::set<std::string>& scheme_whitelist); | |
168 | 249 |
169 // Writes |private_data| as a cache file to |file_path| and returns success | 250 // Gets if the cache database is enabled. |
170 // via |succeeded|. | 251 bool cache_enabled() const { return cache_enabled_ && cache_db_; } |
171 static void WritePrivateDataToCacheFileTask( | |
172 scoped_refptr<URLIndexPrivateData> private_data, | |
173 const FilePath& file_path, | |
174 scoped_refptr<RefCountedBool> succeeded); | |
175 | 252 |
176 // Caches the index private data and writes the cache file to the profile | 253 // Initializes all index private data members in preparation for restoring, |
177 // directory. Called by WritePrivateDataToCacheFileTask. | 254 // rebuilding or resetting the index. |
178 bool SaveToFile(const FilePath& file_path); | |
179 | |
180 // Initializes all index data members in preparation for restoring the index | |
181 // from the cache or a complete rebuild from the history database. | |
182 void Clear(); | 255 void Clear(); |
183 | 256 |
184 // Returns true if there is no data in the index. | |
185 bool Empty() const; | |
186 | |
187 // Creates a copy of ourself. | |
188 scoped_refptr<URLIndexPrivateData> Duplicate() const; | |
189 | |
190 // Adds |word_id| to |history_id|'s entry in the history/word map, | |
191 // creating a new entry if one does not already exist. | |
192 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | |
193 | |
194 // Given a set of Char16s, finds words containing those characters. | |
195 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | |
196 | |
197 // URL History indexing support functions. | 257 // URL History indexing support functions. |
198 | 258 |
199 // Indexes one URL history item as described by |row|. Returns true if the | 259 // Indexes one URL history item as described by |row|. Returns true if the |
200 // row was actually indexed. |languages| gives a list of language encodings by | 260 // row was actually indexed. |
201 // which the URLs and page titles are broken down into words and characters. | 261 bool IndexRow(const URLRow& row); |
202 // |scheme_whitelist| is used to filter non-qualifying schemes. | |
203 bool IndexRow(const URLRow& row, | |
204 const std::string& languages, | |
205 const std::set<std::string>& scheme_whitelist); | |
206 | |
207 // Adds the history item in |row| to the index if it does not already already | |
208 // exist and it meets the minimum 'quick' criteria. If the row already exists | |
209 // in the index then the index will be updated if the row still meets the | |
210 // criteria, otherwise the row will be removed from the index. Returns true | |
211 // if the index was actually updated. |languages| gives a list of language | |
212 // encodings by which the URLs and page titles are broken down into words and | |
213 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. | |
214 bool UpdateURL(const URLRow& row, | |
215 const std::string& languages, | |
216 const std::set<std::string>& scheme_whitelist); | |
217 | |
218 // Deletes index data for the history item with the given |url|. | |
219 // The item may not have actually been indexed, which is the case if it did | |
220 // not previously meet minimum 'quick' criteria. Returns true if the index | |
221 // was actually updated. | |
222 bool DeleteURL(const GURL& url); | |
223 | 262 |
224 // Parses and indexes the words in the URL and page title of |row| and | 263 // Parses and indexes the words in the URL and page title of |row| and |
225 // calculate the word starts in each, saving the starts in |word_starts|. | 264 // calculate the word starts in each, saving the starts in |word_starts|. |
226 // |languages| gives a list of language encodings by which the URLs and page | 265 // |languages| gives a list of language encodings by which the URLs and page |
227 // titles are broken down into words and characters. | 266 // titles are broken down into words and characters. |
228 void AddRowWordsToIndex(const URLRow& row, | 267 void AddRowWordsToIndex(const URLRow& row, |
229 RowWordStarts* word_starts, | 268 RowWordStarts* word_starts); |
230 const std::string& languages); | |
231 | |
232 // Removes |row| and all associated words and characters from the index. | |
233 void RemoveRowFromIndex(const URLRow& row); | |
234 | |
235 // Removes all words and characters associated with |row| from the index. | |
236 void RemoveRowWordsFromIndex(const URLRow& row); | |
237 | 269 |
238 // Given a single word in |uni_word|, adds a reference for the containing | 270 // Given a single word in |uni_word|, adds a reference for the containing |
239 // history item identified by |history_id| to the index. | 271 // history item identified by |history_id| to the index. |
240 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 272 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
241 | 273 |
242 // Updates an existing entry in the word/history index by adding the | 274 // Updates an existing entry in the word/history index by adding the |
243 // |history_id| to set for |word_id| in the word_id_history_map_. | 275 // |history_id| to set for |word_id| in the word_id_history_map_. |
244 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 276 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
245 | 277 |
246 // Creates a new entry in the word/history map for |word_id| and add | 278 // Creates a new entry in the word/history map for |word_id| and add |
247 // |history_id| as the initial element of the word's set. | 279 // |history_id| as the initial element of the word's set. |
248 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 280 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
249 | 281 |
| 282 // Removes |row| and all associated words and characters from the index. |
| 283 void RemoveRowFromIndex(const URLRow& row); |
| 284 |
| 285 // Removes all words and characters associated with |row| from the index. |
| 286 void RemoveRowWordsFromIndex(const URLRow& row); |
| 287 |
| 288 // Adds |word_id| to |history_id|'s entry in the history/word map, |
| 289 // creating a new entry if one does not already exist. |
| 290 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
| 291 |
250 // Clears |used_| for each item in the search term cache. | 292 // Clears |used_| for each item in the search term cache. |
251 void ResetSearchTermCache(); | 293 void ResetSearchTermCache(); |
252 | 294 |
253 // Composes a set of history item IDs by intersecting the set for each word | 295 // Composes a set of history item IDs by intersecting the set for each word |
254 // in |unsorted_words|. | 296 // in |unsorted_words|. |
255 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 297 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
256 | 298 |
257 // Helper function to HistoryIDSetFromWords which composes a set of history | 299 // Helper function to HistoryIDSetFromWords which composes a set of history |
258 // ids for the given term given in |term|. | 300 // ids for the given term given in |term|. |
259 HistoryIDSet HistoryIDsForTerm(const string16& term); | 301 HistoryIDSet HistoryIDsForTerm(const string16& term); |
260 | 302 |
261 // Encode a data structure into the protobuf |cache|. | 303 // Given a set of Char16s, finds words containing those characters. |
262 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 304 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
263 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | |
264 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | |
265 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | |
266 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | |
267 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | |
268 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; | |
269 | 305 |
270 // Decode a data structure from the protobuf |cache|. Return false if there | 306 // Restores our contents from the cache database |cache_db|. |
271 // is any kind of failure. |languages| will be used to break URLs and page | 307 bool RestoreFromCache(InMemoryURLCacheDatabase* cache_db); |
272 // titles into words | 308 |
273 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache, | 309 // Deletes any old style protobuf-based cache file. |
274 const std::string& languages); | 310 void DeleteOldVersionCacheFile() const; |
275 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 311 |
276 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 312 // Constructs a file path for the cache database within the same directory |
277 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 313 // where the history database is kept and saves that path to |file_path|. |
278 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 314 // Returns true if |file_path| can be successfully constructed. |
279 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 315 bool GetCacheDBPath(FilePath* file_path); |
280 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache, | 316 |
281 const std::string& languages); | 317 // Sets the cache database for testing. Takes ownership of |test_db|. |
| 318 void SetCacheDatabaseForTesting(InMemoryURLCacheDatabase* test_db); |
282 | 319 |
283 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 320 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
284 static bool URLSchemeIsWhitelisted(const GURL& gurl, | 321 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
285 const std::set<std::string>& whitelist); | 322 const std::set<std::string>& whitelist); |
286 | 323 |
| 324 // Directory where cache database (and older protobuf-based cache file) |
| 325 // resides. Except when unit testing, this is the same directory in which |
| 326 // the profile's history database is found. |
| 327 FilePath history_dir_; |
| 328 |
| 329 // Languages used during the word-breaking process during indexing. |
| 330 std::string languages_; |
| 331 |
287 // Cache of search terms. | 332 // Cache of search terms. |
288 SearchTermCacheMap search_term_cache_; | 333 SearchTermCacheMap search_term_cache_; |
289 | 334 |
| 335 // The cache database. |
| 336 scoped_refptr<InMemoryURLCacheDatabase> cache_db_; |
| 337 |
| 338 // true if the cache is enabled. |
| 339 bool cache_enabled_; |
| 340 |
| 341 // true once the shutdown process has begun. |
| 342 bool shutdown_; |
| 343 |
| 344 // Guard that prevents simultaneous, cross-thread access to |shutdown_|. |
| 345 mutable base::Lock lock_; |
| 346 |
290 // Start of data members that are cached ------------------------------------- | 347 // Start of data members that are cached ------------------------------------- |
291 | 348 |
292 // The version of the cache file most recently used to restore this instance | |
293 // of the private data. If the private data was rebuilt from the history | |
294 // database this will be 0. | |
295 int restored_cache_version_; | |
296 | |
297 // A list of all of indexed words. The index of a word in this list is the | 349 // A list of all of indexed words. The index of a word in this list is the |
298 // ID of the word in the word_map_. It reduces the memory overhead by | 350 // ID of the word in the word_map_. It reduces the memory overhead by |
299 // replacing a potentially long and repeated string with a simple index. | 351 // replacing a potentially long and repeated string with a simple index. |
300 String16Vector word_list_; | 352 String16Vector word_list_; |
301 | 353 |
302 // A list of available words slots in |word_list_|. An available word slot | 354 // A list of available words slots in |word_list_|. An available word slot |
303 // is the index of a unused word in word_list_ vector, also referred to as | 355 // is the index of a unused word in word_list_ vector, also referred to as |
304 // a WordID. As URL visits are added or modified new words may be added to | 356 // a WordID. As URL visits are added or modified new words may be added to |
305 // the index, in which case any available words are used, if any, and then | 357 // the index, in which case any available words are used, if any, and then |
306 // words are added to the end of the word_list_. When URL visits are | 358 // words are added to the end of the word_list_. When URL visits are |
(...skipping 22 matching lines...) Expand all Loading... |
329 // A one-to-one mapping from HistoryID to the history item data governing | 381 // A one-to-one mapping from HistoryID to the history item data governing |
330 // index inclusion and relevance scoring. | 382 // index inclusion and relevance scoring. |
331 HistoryInfoMap history_info_map_; | 383 HistoryInfoMap history_info_map_; |
332 | 384 |
333 // A one-to-one mapping from HistoryID to the word starts detected in each | 385 // A one-to-one mapping from HistoryID to the word starts detected in each |
334 // item's URL and page title. | 386 // item's URL and page title. |
335 WordStartsMap word_starts_map_; | 387 WordStartsMap word_starts_map_; |
336 | 388 |
337 // End of data members that are cached --------------------------------------- | 389 // End of data members that are cached --------------------------------------- |
338 | 390 |
339 // For unit testing only. Specifies the version of the cache file to be saved. | 391 // Only URLs with a whitelisted scheme are indexed. |
340 // Used only for testing upgrading of an older version of the cache upon | 392 std::set<std::string> scheme_whitelist_; |
341 // restore. | |
342 int saved_cache_version_; | |
343 | 393 |
344 // Used for unit testing only. Records the number of candidate history items | 394 // Used for unit testing only. Records the number of candidate history items |
345 // at three stages in the index searching process. | 395 // at three stages in the index searching process. |
346 size_t pre_filter_item_count_; // After word index is queried. | 396 size_t pre_filter_item_count_; // After word index is queried. |
347 size_t post_filter_item_count_; // After trimming large result set. | 397 size_t post_filter_item_count_; // After trimming large result set. |
348 size_t post_scoring_item_count_; // After performing final filter/scoring. | 398 size_t post_scoring_item_count_; // After performing final filter/scoring. |
349 }; | 399 }; |
350 | 400 |
351 } // namespace history | 401 } // namespace history |
352 | 402 |
353 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 403 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |