OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <set> | 8 #include <set> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/file_path.h" | 11 #include "base/file_path.h" |
12 #include "base/gtest_prod_util.h" | 12 #include "base/gtest_prod_util.h" |
13 #include "base/memory/ref_counted.h" | 13 #include "base/memory/ref_counted.h" |
14 #include "base/synchronization/lock.h" | |
15 #include "base/threading/sequenced_worker_pool.h" | |
16 #include "chrome/browser/history/in_memory_url_index_types.h" | 14 #include "chrome/browser/history/in_memory_url_index_types.h" |
| 15 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
17 #include "chrome/browser/history/scored_history_match.h" | 16 #include "chrome/browser/history/scored_history_match.h" |
18 #include "content/public/browser/notification_details.h" | 17 #include "content/public/browser/notification_details.h" |
19 | 18 |
20 class HistoryQuickProviderTest; | 19 class HistoryQuickProviderTest; |
21 | 20 |
22 namespace in_memory_url_index { | 21 namespace in_memory_url_index { |
23 class InMemoryURLIndexCacheItem; | 22 class InMemoryURLIndexCacheItem; |
24 } | 23 } |
25 | 24 |
26 namespace history { | 25 namespace history { |
27 | 26 |
28 namespace imui = in_memory_url_index; | 27 namespace imui = in_memory_url_index; |
29 | 28 |
30 class HistoryDatabase; | 29 class HistoryDatabase; |
31 class InMemoryURLCacheDatabase; | |
32 class InMemoryURLIndex; | 30 class InMemoryURLIndex; |
33 class RefCountedBool; | 31 class RefCountedBool; |
34 | 32 |
35 // A structure private to InMemoryURLIndex describing its internal data and | 33 // Current version of the cache file. |
36 // providing for restoring, rebuilding and updating that internal data. As | 34 static const int kCurrentCacheFileVersion = 1; |
37 // this class is for exclusive use by the InMemoryURLIndex class there should | 35 |
38 // be no calls from any other class. | 36 // A structure describing the InMemoryURLIndex's internal data and providing for |
39 // | 37 // restoring, rebuilding and updating that internal data. |
40 // All public member functions are called on the main thread unless otherwise | |
41 // annotated. | |
42 class URLIndexPrivateData | 38 class URLIndexPrivateData |
43 : public base::RefCountedThreadSafe<URLIndexPrivateData> { | 39 : public base::RefCountedThreadSafe<URLIndexPrivateData> { |
44 public: | 40 public: |
45 // Creates a new instance of private data, creating or opening the cache | 41 URLIndexPrivateData(); |
46 // database located in |history_dir|. |languages| is used to break down | |
47 // search terms, URLs, and page titles into words and characters. | |
48 URLIndexPrivateData(const FilePath& history_dir, | |
49 const std::string& languages); | |
50 | |
51 // Initializes the private data and its cache database. Returns true if the | |
52 // database is successfully initialized. Any failures will mark the cache | |
53 // database as not enabled. |sequence_token| is used to coordinate all | |
54 // future database operations (not including those performed during this | |
55 // initialization). Called on the DB thread. | |
56 bool Init(base::SequencedWorkerPool::SequenceToken sequence_token); | |
57 | |
58 // Performs a Clear() and then erases the cache database. Called on the | |
59 // worker pool sequenced by InMemoryURLIndex's |sequence_token_|. | |
60 void Reset(); | |
61 | |
62 // Returns true if there is no data in the index. | |
63 bool Empty() const; | |
64 | |
65 // Returns a copy of the private data for archiving purposes. | |
66 URLIndexPrivateData* Snapshot() const; | |
67 | |
68 // Closes the database. | |
69 void Shutdown(); | |
70 | |
71 // Verifies that the private data is consistent. | |
72 bool ValidateConsistency() const; | |
73 | |
74 // Given a string16 in |search_string|, scans the history index and returns a | |
75 // vector with all scored, matching history items. The |search_string| is | |
76 // broken down into individual terms (words), each of which must occur in the | |
77 // candidate history item's URL or page title for the item to qualify; | |
78 // however, the terms do not necessarily have to be adjacent. Once we have | |
79 // a set of candidates, they are filtered to insure that all |search_string| | |
80 // terms, as separated by whitespace, occur within the candidate's URL | |
81 // or page title. Scores are then calculated on no more than | |
82 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of | |
83 // candidates may cause perceptible typing response delays in the omnibox. | |
84 // This is likely to occur for short omnibox terms such as 'h' and 'w' which | |
85 // will be found in nearly all history candidates. Results are sorted by | |
86 // descending score. The full results set (i.e. beyond the | |
87 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | |
88 // to this function. | |
89 ScoredHistoryMatches HistoryItemsForTerms(const string16& search_string); | |
90 | |
91 // Adds the history item in |row| to the index if it does not already already | |
92 // exist and it meets the minimum 'quick' criteria. If the row already exists | |
93 // in the index then the index will be updated if the row still meets the | |
94 // criteria, otherwise the row will be removed from the index. Returns true | |
95 // if the index was actually updated. Posts updates to the cache database | |
96 // that are run on the worker pool sequenced by InMemoryURLIndex's | |
97 // |sequence_token_|. | |
98 bool UpdateURL(const URLRow& row); | |
99 | |
100 // Deletes index data for the history item with the given |url|. | |
101 // The item may not have actually been indexed, which is the case if it did | |
102 // not previously meet minimum 'quick' criteria. Returns true if the index | |
103 // was actually updated. Posts updates to the cache database that are run on | |
104 // the worker pool sequenced by InMemoryURLIndex's |sequence_token_|. | |
105 bool DeleteURL(const GURL& url); | |
106 | |
107 // Sets if the cache database is enabled. | |
108 void set_cache_enabled(bool enabled) { cache_enabled_ = enabled; } | |
109 | |
110 // Returns the cache database. | |
111 InMemoryURLCacheDatabase* cache_db() { return cache_db_.get(); } | |
112 | |
113 // Restores the index data from the contents of the cache database. This is | |
114 // called on the DB thread during profile startup and returns true upon a | |
115 // successful restoration. Restoration will fail if there is no cache | |
116 // database or the cache database has been corrupted. All other database | |
117 // operations (i.e. updates from site visits, etc.) will be postponed while | |
118 // this task is being run. | |
119 bool RestoreFromCacheTask(); | |
120 | |
121 // Constructs a new private data object by rebuilding its contents from the | |
122 // history database in |history_db|. Returns the new URLIndexPrivateData which | |
123 // on success will contain the rebuilt data but upon failure will be empty. | |
124 // |history_dir| points to the directory in which the cache database will be | |
125 // created. |old_data| provides the cache database and the languages to be | |
126 // used for breaking down search terms, URLs and page titles. This is called | |
127 // on the DB thread during profile startup iff restoring from the cache | |
128 // database fails (see also RestoreFromCacheTask()). All other database | |
129 // operations (i.e. updates from site visits, etc.) will be postponed while | |
130 // this task is being run. | |
131 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | |
132 HistoryDatabase* history_db, | |
133 scoped_refptr<URLIndexPrivateData> old_data); | |
134 | |
135 // Completely refreshes the contents of the cache database using the contents | |
136 // of the in-memory index data. This task is performed on the sequenced | |
137 // blocking pool using the sequence_token with which this instance was | |
138 // Init'ed. A refresh will occur 1) during profile startup if a | |
139 // RebuildFromHistory(...) is required, or 2) at any time database corruption | |
140 // is detected while updating the database in an attempt to repair the | |
141 // corruption. | |
142 void RefreshCacheTask(); | |
143 | |
144 static void InitializeSchemeWhitelistForTesting( | |
145 std::set<std::string>* whitelist); | |
146 | 42 |
147 private: | 43 private: |
148 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; | 44 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; |
| 45 ~URLIndexPrivateData(); |
| 46 |
149 friend class AddHistoryMatch; | 47 friend class AddHistoryMatch; |
150 friend class ::HistoryQuickProviderTest; | 48 friend class ::HistoryQuickProviderTest; |
151 friend class InMemoryURLCacheDatabase; | 49 friend class InMemoryURLIndex; |
152 friend class InMemoryURLIndexCacheTest; | |
153 friend class InMemoryURLIndexTest; | 50 friend class InMemoryURLIndexTest; |
154 friend class InMemoryURLIndexBaseTest; | 51 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
155 friend class IntercessionaryIndexTest; | |
156 friend class URLIndexOldCacheTest; | |
157 friend class URLIndexPrivateDataTest; | |
158 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 52 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
| 53 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
159 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 54 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
160 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 55 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
161 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 56 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
162 FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest, CacheDatabaseFailure); | |
163 FRIEND_TEST_ALL_PREFIXES(IntercessionaryIndexTest, | |
164 ShutdownDuringCacheRefresh); | |
165 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 57 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
166 FRIEND_TEST_ALL_PREFIXES(URLIndexPrivateDataTest, CacheFetch); | |
167 | 58 |
168 // Support caching of term results so that we can optimize searches which | 59 // Support caching of term results so that we can optimize searches which |
169 // build upon a previous search. Each entry in this map represents one | 60 // build upon a previous search. Each entry in this map represents one |
170 // search term from the most recent search. For example, if the user had | 61 // search term from the most recent search. For example, if the user had |
171 // typed "google blog trans" and then typed an additional 'l' (at the end, | 62 // typed "google blog trans" and then typed an additional 'l' (at the end, |
172 // of course) then there would be four items in the cache: 'blog', 'google', | 63 // of course) then there would be four items in the cache: 'blog', 'google', |
173 // 'trans', and 'transl'. All would be marked as being in use except for the | 64 // 'trans', and 'transl'. All would be marked as being in use except for the |
174 // 'trans' item; its cached data would have been used when optimizing the | 65 // 'trans' item; its cached data would have been used when optimizing the |
175 // construction of the search results candidates for 'transl' but then would | 66 // construction of the search results candidates for 'transl' but then would |
176 // no longer needed. | 67 // no longer needed. |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
225 public: | 116 public: |
226 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); | 117 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); |
227 ~HistoryItemFactorGreater(); | 118 ~HistoryItemFactorGreater(); |
228 | 119 |
229 bool operator()(const HistoryID h1, const HistoryID h2); | 120 bool operator()(const HistoryID h1, const HistoryID h2); |
230 | 121 |
231 private: | 122 private: |
232 const history::HistoryInfoMap& history_info_map_; | 123 const history::HistoryInfoMap& history_info_map_; |
233 }; | 124 }; |
234 | 125 |
235 // Creates a new instance of private data for purposes of rebuilding from | 126 // Given a string16 in |term_string|, scans the history index and returns a |
236 // the history database while simultaneously allowing continued use of an | 127 // vector with all scored, matching history items. The |term_string| is |
237 // older private data |old_data|. The old data will still be used for | 128 // broken down into individual terms (words), each of which must occur in the |
238 // providing search results. Any updates to the private data will be queued | 129 // candidate history item's URL or page title for the item to qualify; |
239 // for application to the new data once it has been successfully rebuilt. | 130 // however, the terms do not necessarily have to be adjacent. Once we have |
240 URLIndexPrivateData(const URLIndexPrivateData& old_data); | 131 // a set of candidates, they are filtered to insure that all |term_string| |
| 132 // terms, as separated by whitespace, occur within the candidate's URL |
| 133 // or page title. Scores are then calculated on no more than |
| 134 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
| 135 // candidates may cause perceptible typing response delays in the omnibox. |
| 136 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
| 137 // will be found in nearly all history candidates. Results are sorted by |
| 138 // descending score. The full results set (i.e. beyond the |
| 139 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 140 // to this function. |
| 141 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
241 | 142 |
242 // The following constructor is for unit testing purposes only. | 143 // Creates a new URLIndexPrivateData object, populates it from the contents |
243 URLIndexPrivateData(); | 144 // of the cache file stored in |file_path|, and assigns it to |private_data|. |
| 145 // |languages| will be used to break URLs and page titles into words. |
| 146 static void RestoreFromFileTask( |
| 147 const FilePath& file_path, |
| 148 scoped_refptr<URLIndexPrivateData> private_data, |
| 149 const std::string& languages); |
244 | 150 |
245 virtual ~URLIndexPrivateData(); | 151 // Constructs a new object by restoring its contents from the file at |path|. |
| 152 // Returns the new URLIndexPrivateData which on success will contain the |
| 153 // restored data but upon failure will be empty. |languages| will be used to |
| 154 // break URLs and page titles into words |
| 155 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
| 156 const FilePath& path, |
| 157 const std::string& languages); |
246 | 158 |
247 // Returns true if the profile is shutting down. Thread-safe. | 159 // Constructs a new object by rebuilding its contents from the history |
248 bool IsShutdown() const; | 160 // database in |history_db|. Returns the new URLIndexPrivateData which on |
| 161 // success will contain the rebuilt data but upon failure will be empty. |
| 162 // |languages| gives a list of language encodings by which the URLs and page |
| 163 // titles are broken down into words and characters. |
| 164 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 165 HistoryDatabase* history_db, |
| 166 const std::string& languages, |
| 167 const std::set<std::string>& scheme_whitelist); |
249 | 168 |
250 // Gets if the cache database is enabled. | 169 // Writes |private_data| as a cache file to |file_path| and returns success |
251 bool cache_enabled() const { return cache_enabled_ && cache_db_; } | 170 // via |succeeded|. |
| 171 static void WritePrivateDataToCacheFileTask( |
| 172 scoped_refptr<URLIndexPrivateData> private_data, |
| 173 const FilePath& file_path, |
| 174 scoped_refptr<RefCountedBool> succeeded); |
252 | 175 |
253 // Initializes all index private data members in preparation for restoring, | 176 // Caches the index private data and writes the cache file to the profile |
254 // rebuilding or resetting the index. | 177 // directory. Called by WritePrivateDataToCacheFileTask. |
| 178 bool SaveToFile(const FilePath& file_path); |
| 179 |
| 180 // Initializes all index data members in preparation for restoring the index |
| 181 // from the cache or a complete rebuild from the history database. |
255 void Clear(); | 182 void Clear(); |
256 | 183 |
| 184 // Returns true if there is no data in the index. |
| 185 bool Empty() const; |
| 186 |
| 187 // Creates a copy of ourself. |
| 188 scoped_refptr<URLIndexPrivateData> Duplicate() const; |
| 189 |
| 190 // Adds |word_id| to |history_id|'s entry in the history/word map, |
| 191 // creating a new entry if one does not already exist. |
| 192 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
| 193 |
| 194 // Given a set of Char16s, finds words containing those characters. |
| 195 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| 196 |
257 // URL History indexing support functions. | 197 // URL History indexing support functions. |
258 | 198 |
259 // Indexes one URL history item as described by |row|. Returns true if the | 199 // Indexes one URL history item as described by |row|. Returns true if the |
260 // row was actually indexed. | 200 // row was actually indexed. |languages| gives a list of language encodings by |
261 bool IndexRow(const URLRow& row); | 201 // which the URLs and page titles are broken down into words and characters. |
| 202 // |scheme_whitelist| is used to filter non-qualifying schemes. |
| 203 bool IndexRow(const URLRow& row, |
| 204 const std::string& languages, |
| 205 const std::set<std::string>& scheme_whitelist); |
| 206 |
| 207 // Adds the history item in |row| to the index if it does not already already |
| 208 // exist and it meets the minimum 'quick' criteria. If the row already exists |
| 209 // in the index then the index will be updated if the row still meets the |
| 210 // criteria, otherwise the row will be removed from the index. Returns true |
| 211 // if the index was actually updated. |languages| gives a list of language |
| 212 // encodings by which the URLs and page titles are broken down into words and |
| 213 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |
| 214 bool UpdateURL(const URLRow& row, |
| 215 const std::string& languages, |
| 216 const std::set<std::string>& scheme_whitelist); |
| 217 |
| 218 // Deletes index data for the history item with the given |url|. |
| 219 // The item may not have actually been indexed, which is the case if it did |
| 220 // not previously meet minimum 'quick' criteria. Returns true if the index |
| 221 // was actually updated. |
| 222 bool DeleteURL(const GURL& url); |
262 | 223 |
263 // Parses and indexes the words in the URL and page title of |row| and | 224 // Parses and indexes the words in the URL and page title of |row| and |
264 // calculate the word starts in each, saving the starts in |word_starts|. | 225 // calculate the word starts in each, saving the starts in |word_starts|. |
265 // |languages| gives a list of language encodings by which the URLs and page | 226 // |languages| gives a list of language encodings by which the URLs and page |
266 // titles are broken down into words and characters. | 227 // titles are broken down into words and characters. |
267 void AddRowWordsToIndex(const URLRow& row, | 228 void AddRowWordsToIndex(const URLRow& row, |
268 RowWordStarts* word_starts); | 229 RowWordStarts* word_starts, |
| 230 const std::string& languages); |
| 231 |
| 232 // Removes |row| and all associated words and characters from the index. |
| 233 void RemoveRowFromIndex(const URLRow& row); |
| 234 |
| 235 // Removes all words and characters associated with |row| from the index. |
| 236 void RemoveRowWordsFromIndex(const URLRow& row); |
269 | 237 |
270 // Given a single word in |uni_word|, adds a reference for the containing | 238 // Given a single word in |uni_word|, adds a reference for the containing |
271 // history item identified by |history_id| to the index. | 239 // history item identified by |history_id| to the index. |
272 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 240 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
273 | 241 |
274 // Updates an existing entry in the word/history index by adding the | 242 // Updates an existing entry in the word/history index by adding the |
275 // |history_id| to set for |word_id| in the word_id_history_map_. | 243 // |history_id| to set for |word_id| in the word_id_history_map_. |
276 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 244 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
277 | 245 |
278 // Creates a new entry in the word/history map for |word_id| and add | 246 // Creates a new entry in the word/history map for |word_id| and add |
279 // |history_id| as the initial element of the word's set. | 247 // |history_id| as the initial element of the word's set. |
280 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 248 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
281 | 249 |
282 // Removes |row| and all associated words and characters from the index. | |
283 void RemoveRowFromIndex(const URLRow& row); | |
284 | |
285 // Removes all words and characters associated with |row| from the index. | |
286 void RemoveRowWordsFromIndex(const URLRow& row); | |
287 | |
288 // Adds |word_id| to |history_id|'s entry in the history/word map, | |
289 // creating a new entry if one does not already exist. | |
290 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | |
291 | |
292 // Clears |used_| for each item in the search term cache. | 250 // Clears |used_| for each item in the search term cache. |
293 void ResetSearchTermCache(); | 251 void ResetSearchTermCache(); |
294 | 252 |
295 // Composes a set of history item IDs by intersecting the set for each word | 253 // Composes a set of history item IDs by intersecting the set for each word |
296 // in |unsorted_words|. | 254 // in |unsorted_words|. |
297 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 255 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
298 | 256 |
299 // Helper function to HistoryIDSetFromWords which composes a set of history | 257 // Helper function to HistoryIDSetFromWords which composes a set of history |
300 // ids for the given term given in |term|. | 258 // ids for the given term given in |term|. |
301 HistoryIDSet HistoryIDsForTerm(const string16& term); | 259 HistoryIDSet HistoryIDsForTerm(const string16& term); |
302 | 260 |
303 // Given a set of Char16s, finds words containing those characters. | 261 // Encode a data structure into the protobuf |cache|. |
304 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 262 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
| 263 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
| 264 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 265 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 266 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 267 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 268 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; |
305 | 269 |
306 // Restores our contents from the cache database |cache_db|. | 270 // Decode a data structure from the protobuf |cache|. Return false if there |
307 bool RestoreFromCache(InMemoryURLCacheDatabase* cache_db); | 271 // is any kind of failure. |languages| will be used to break URLs and page |
308 | 272 // titles into words |
309 // Deletes any old style protobuf-based cache file. | 273 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache, |
310 void DeleteOldVersionCacheFile() const; | 274 const std::string& languages); |
311 | 275 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
312 // Constructs a file path for the cache database within the same directory | 276 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
313 // where the history database is kept and saves that path to |file_path|. | 277 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
314 // Returns true if |file_path| can be successfully constructed. | 278 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
315 bool GetCacheDBPath(FilePath* file_path); | 279 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
316 | 280 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache, |
317 // Sets the cache database for testing. Takes ownership of |test_db|. | 281 const std::string& languages); |
318 void SetCacheDatabaseForTesting(InMemoryURLCacheDatabase* test_db); | |
319 | 282 |
320 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 283 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
321 static bool URLSchemeIsWhitelisted(const GURL& gurl, | 284 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
322 const std::set<std::string>& whitelist); | 285 const std::set<std::string>& whitelist); |
323 | 286 |
324 // Directory where cache database (and older protobuf-based cache file) | |
325 // resides. Except when unit testing, this is the same directory in which | |
326 // the profile's history database is found. | |
327 FilePath history_dir_; | |
328 | |
329 // Languages used during the word-breaking process during indexing. | |
330 std::string languages_; | |
331 | |
332 // Cache of search terms. | 287 // Cache of search terms. |
333 SearchTermCacheMap search_term_cache_; | 288 SearchTermCacheMap search_term_cache_; |
334 | 289 |
335 // The cache database. | 290 // Start of data members that are cached ------------------------------------- |
336 scoped_refptr<InMemoryURLCacheDatabase> cache_db_; | |
337 | 291 |
338 // true if the cache is enabled. | 292 // The version of the cache file most recently used to restore this instance |
339 bool cache_enabled_; | 293 // of the private data. If the private data was rebuilt from the history |
340 | 294 // database this will be 0. |
341 // true once the shutdown process has begun. | 295 int restored_cache_version_; |
342 bool shutdown_; | |
343 | |
344 // Guard that prevents simultaneous, cross-thread access to |shutdown_|. | |
345 mutable base::Lock lock_; | |
346 | |
347 // Start of data members that are cached ------------------------------------- | |
348 | 296 |
349 // A list of all of indexed words. The index of a word in this list is the | 297 // A list of all of indexed words. The index of a word in this list is the |
350 // ID of the word in the word_map_. It reduces the memory overhead by | 298 // ID of the word in the word_map_. It reduces the memory overhead by |
351 // replacing a potentially long and repeated string with a simple index. | 299 // replacing a potentially long and repeated string with a simple index. |
352 String16Vector word_list_; | 300 String16Vector word_list_; |
353 | 301 |
354 // A list of available words slots in |word_list_|. An available word slot | 302 // A list of available words slots in |word_list_|. An available word slot |
355 // is the index of a unused word in word_list_ vector, also referred to as | 303 // is the index of a unused word in word_list_ vector, also referred to as |
356 // a WordID. As URL visits are added or modified new words may be added to | 304 // a WordID. As URL visits are added or modified new words may be added to |
357 // the index, in which case any available words are used, if any, and then | 305 // the index, in which case any available words are used, if any, and then |
(...skipping 23 matching lines...) Expand all Loading... |
381 // A one-to-one mapping from HistoryID to the history item data governing | 329 // A one-to-one mapping from HistoryID to the history item data governing |
382 // index inclusion and relevance scoring. | 330 // index inclusion and relevance scoring. |
383 HistoryInfoMap history_info_map_; | 331 HistoryInfoMap history_info_map_; |
384 | 332 |
385 // A one-to-one mapping from HistoryID to the word starts detected in each | 333 // A one-to-one mapping from HistoryID to the word starts detected in each |
386 // item's URL and page title. | 334 // item's URL and page title. |
387 WordStartsMap word_starts_map_; | 335 WordStartsMap word_starts_map_; |
388 | 336 |
389 // End of data members that are cached --------------------------------------- | 337 // End of data members that are cached --------------------------------------- |
390 | 338 |
391 // Only URLs with a whitelisted scheme are indexed. | 339 // For unit testing only. Specifies the version of the cache file to be saved. |
392 std::set<std::string> scheme_whitelist_; | 340 // Used only for testing upgrading of an older version of the cache upon |
| 341 // restore. |
| 342 int saved_cache_version_; |
393 | 343 |
394 // Used for unit testing only. Records the number of candidate history items | 344 // Used for unit testing only. Records the number of candidate history items |
395 // at three stages in the index searching process. | 345 // at three stages in the index searching process. |
396 size_t pre_filter_item_count_; // After word index is queried. | 346 size_t pre_filter_item_count_; // After word index is queried. |
397 size_t post_filter_item_count_; // After trimming large result set. | 347 size_t post_filter_item_count_; // After trimming large result set. |
398 size_t post_scoring_item_count_; // After performing final filter/scoring. | 348 size_t post_scoring_item_count_; // After performing final filter/scoring. |
399 }; | 349 }; |
400 | 350 |
401 } // namespace history | 351 } // namespace history |
402 | 352 |
403 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 353 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |