OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
11 #include "chrome/browser/history/in_memory_url_index_types.h" | 11 #include "chrome/browser/history/in_memory_url_index_types.h" |
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
13 | 13 |
14 class HistoryQuickProviderTest; | 14 class HistoryQuickProviderTest; |
15 | 15 |
16 namespace in_memory_url_index { | 16 namespace in_memory_url_index { |
17 class InMemoryURLIndexCacheItem; | 17 class InMemoryURLIndexCacheItem; |
18 } | 18 } |
19 | 19 |
20 namespace history { | 20 namespace history { |
21 | 21 |
22 namespace imui = in_memory_url_index; | 22 namespace imui = in_memory_url_index; |
23 | 23 |
24 class HistoryDatabase; | 24 class HistoryDatabase; |
25 | 25 |
| 26 // Current version of the cache file. |
| 27 static const int kCurrentCacheFileVersion = 1; |
| 28 |
26 // A structure describing the InMemoryURLIndex's internal data and providing for | 29 // A structure describing the InMemoryURLIndex's internal data and providing for |
27 // restoring, rebuilding and updating that internal data. | 30 // restoring, rebuilding and updating that internal data. |
28 class URLIndexPrivateData { | 31 class URLIndexPrivateData { |
29 public: | 32 public: |
30 URLIndexPrivateData(); | 33 URLIndexPrivateData(); |
31 ~URLIndexPrivateData(); | 34 ~URLIndexPrivateData(); |
32 | 35 |
33 private: | 36 private: |
34 friend class AddHistoryMatch; | 37 friend class AddHistoryMatch; |
35 friend class ::HistoryQuickProviderTest; | 38 friend class ::HistoryQuickProviderTest; |
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 // criteria, otherwise the row will be removed from the index. Returns true | 173 // criteria, otherwise the row will be removed from the index. Returns true |
171 // if the index was actually updated. | 174 // if the index was actually updated. |
172 bool UpdateURL(const URLRow& row); | 175 bool UpdateURL(const URLRow& row); |
173 | 176 |
174 // Deletes indexing data for the history item with the URL given in |url|. | 177 // Deletes indexing data for the history item with the URL given in |url|. |
175 // The item may not have actually been indexed, which is the case if it did | 178 // The item may not have actually been indexed, which is the case if it did |
176 // not previously meet minimum 'quick' criteria. Returns true if the index | 179 // not previously meet minimum 'quick' criteria. Returns true if the index |
177 // was actually updated. | 180 // was actually updated. |
178 bool DeleteURL(const GURL& url); | 181 bool DeleteURL(const GURL& url); |
179 | 182 |
180 // Parses and indexes the words in the URL and page title of |row|. | 183 // Parses and indexes the words in the URL and page title of |row| and |
181 void AddRowWordsToIndex(const URLRow& row); | 184 // calculate the word starts in each, saving the starts in |word_starts|. |
| 185 void AddRowWordsToIndex(const URLRow& row, RowWordStarts* word_starts); |
182 | 186 |
183 // Removes |row| and all associated words and characters from the index. | 187 // Removes |row| and all associated words and characters from the index. |
184 void RemoveRowFromIndex(const URLRow& row); | 188 void RemoveRowFromIndex(const URLRow& row); |
185 | 189 |
186 // Removes all words and characters associated with |row| from the index. | 190 // Removes all words and characters associated with |row| from the index. |
187 void RemoveRowWordsFromIndex(const URLRow& row); | 191 void RemoveRowWordsFromIndex(const URLRow& row); |
188 | 192 |
189 // Given a single word in |uni_word|, adds a reference for the containing | 193 // Given a single word in |uni_word|, adds a reference for the containing |
190 // history item identified by |history_id| to the index. | 194 // history item identified by |history_id| to the index. |
191 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 195 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
(...skipping 23 matching lines...) Expand all Loading... |
215 // in the user input, 2) completeness of each term's match, 3) ordering | 219 // in the user input, 2) completeness of each term's match, 3) ordering |
216 // of the occurrence of each term (i.e. they appear in order), 4) last | 220 // of the occurrence of each term (i.e. they appear in order), 4) last |
217 // visit time, and 5) number of visits. | 221 // visit time, and 5) number of visits. |
218 // This raw score allows the results to be ordered and can be used | 222 // This raw score allows the results to be ordered and can be used |
219 // to influence the final score calculated by the client of this | 223 // to influence the final score calculated by the client of this |
220 // index. Returns a ScoredHistoryMatch structure with the raw score and | 224 // index. Returns a ScoredHistoryMatch structure with the raw score and |
221 // substring matching metrics. | 225 // substring matching metrics. |
222 static ScoredHistoryMatch ScoredMatchForURL( | 226 static ScoredHistoryMatch ScoredMatchForURL( |
223 const URLRow& row, | 227 const URLRow& row, |
224 const string16& lower_string, | 228 const string16& lower_string, |
225 const String16Vector& terms_vector); | 229 const String16Vector& terms_vector, |
| 230 const RowWordStarts& word_starts); |
226 | 231 |
227 // Calculates a component score based on position, ordering and total | 232 // Calculates a component score based on position, ordering and total |
228 // substring match size using metrics recorded in |matches|. |max_length| | 233 // substring match size using metrics recorded in |matches|. |max_length| |
229 // is the length of the string against which the terms are being searched. | 234 // is the length of the string against which the terms are being searched. |
230 static int ScoreComponentForMatches(const TermMatches& matches, | 235 static int ScoreComponentForMatches(const TermMatches& matches, |
231 size_t max_length); | 236 size_t max_length); |
232 | 237 |
233 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 238 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
234 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | 239 bool URLSchemeIsWhitelisted(const GURL& gurl) const; |
235 | 240 |
| 241 // Sets the version of the cache file that will be saved when calling |
| 242 // SavePrivateData(). For unit testing only. |
| 243 void set_saved_cache_version(int version) { saved_cache_version_ = version; } |
| 244 |
236 // Encode a data structure into the protobuf |cache|. | 245 // Encode a data structure into the protobuf |cache|. |
237 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 246 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
238 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 247 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
239 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 248 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
240 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 249 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
241 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 250 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
242 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 251 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
| 252 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; |
243 | 253 |
244 // Decode a data structure from the protobuf |cache|. Return false if there | 254 // Decode a data structure from the protobuf |cache|. Return false if there |
245 // is any kind of failure. | 255 // is any kind of failure. |
246 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 256 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
247 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 257 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
248 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 258 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
249 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 259 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
250 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 260 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
251 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 261 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
| 262 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache); |
252 | 263 |
253 // Cache of search terms. | 264 // Cache of search terms. |
254 SearchTermCacheMap search_term_cache_; | 265 SearchTermCacheMap search_term_cache_; |
255 | 266 |
256 // Languages used during the word-breaking process during indexing. | 267 // Languages used during the word-breaking process during indexing. |
257 std::string languages_; | 268 std::string languages_; |
258 | 269 |
259 // Only URLs with a whitelisted scheme are indexed. | 270 // Only URLs with a whitelisted scheme are indexed. |
260 std::set<std::string> scheme_whitelist_; | 271 std::set<std::string> scheme_whitelist_; |
261 | 272 |
262 // Start of data members that are cached ------------------------------------- | 273 // Start of data members that are cached ------------------------------------- |
263 | 274 |
| 275 // The version of the cache file most recently used to restore this instance |
| 276 // of the private data. If the private data was rebuilt from the history |
| 277 // database this will be 0. |
| 278 int restored_cache_version_; |
| 279 |
264 // A list of all of indexed words. The index of a word in this list is the | 280 // A list of all of indexed words. The index of a word in this list is the |
265 // ID of the word in the word_map_. It reduces the memory overhead by | 281 // ID of the word in the word_map_. It reduces the memory overhead by |
266 // replacing a potentially long and repeated string with a simple index. | 282 // replacing a potentially long and repeated string with a simple index. |
267 String16Vector word_list_; | 283 String16Vector word_list_; |
268 | 284 |
269 // A list of available words slots in |word_list_|. An available word slot | 285 // A list of available words slots in |word_list_|. An available word slot |
270 // is the index of a unused word in word_list_ vector, also referred to as | 286 // is the index of a unused word in word_list_ vector, also referred to as |
271 // a WordID. As URL visits are added or modified new words may be added to | 287 // a WordID. As URL visits are added or modified new words may be added to |
272 // the index, in which case any available words are used, if any, and then | 288 // the index, in which case any available words are used, if any, and then |
273 // words are added to the end of the word_list_. When URL visits are | 289 // words are added to the end of the word_list_. When URL visits are |
(...skipping 16 matching lines...) Expand all Loading... |
290 | 306 |
291 // A one-to-many mapping from a HistoryID to all WordIDs of words that occur | 307 // A one-to-many mapping from a HistoryID to all WordIDs of words that occur |
292 // in the URL and/or page title of the history item referenced by that | 308 // in the URL and/or page title of the history item referenced by that |
293 // HistoryID. | 309 // HistoryID. |
294 HistoryIDWordMap history_id_word_map_; | 310 HistoryIDWordMap history_id_word_map_; |
295 | 311 |
296 // A one-to-one mapping from HistoryID to the history item data governing | 312 // A one-to-one mapping from HistoryID to the history item data governing |
297 // index inclusion and relevance scoring. | 313 // index inclusion and relevance scoring. |
298 HistoryInfoMap history_info_map_; | 314 HistoryInfoMap history_info_map_; |
299 | 315 |
| 316 // A one-to-one mapping from HistoryID to the word starts detected in each |
| 317 // item's URL and page title. |
| 318 WordStartsMap word_starts_map_; |
| 319 |
300 // End of data members that are cached --------------------------------------- | 320 // End of data members that are cached --------------------------------------- |
301 | 321 |
| 322 // For unit testing only. Specifies the version of the cache file to be saved. |
| 323 // Used only for testing upgrading of an older version of the cache upon |
| 324 // restore. |
| 325 int saved_cache_version_; |
| 326 |
302 // Used for unit testing only. Records the number of candidate history items | 327 // Used for unit testing only. Records the number of candidate history items |
303 // at three stages in the index searching process. | 328 // at three stages in the index searching process. |
304 size_t pre_filter_item_count_; // After word index is queried. | 329 size_t pre_filter_item_count_; // After word index is queried. |
305 size_t post_filter_item_count_; // After trimming large result set. | 330 size_t post_filter_item_count_; // After trimming large result set. |
306 size_t post_scoring_item_count_; // After performing final filter/scoring. | 331 size_t post_scoring_item_count_; // After performing final filter/scoring. |
307 }; | 332 }; |
308 | 333 |
309 } // namespace history | 334 } // namespace history |
310 | 335 |
311 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 336 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |