Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: chrome/browser/history/text_database_manager.h

Issue 16951015: Remove TextDatabase from the history service. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@replace_fts
Patch Set: Sync and rebase. Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
6 #define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
7
8 #include <cstddef>
9 #include <set>
10 #include <vector>
11
12 #include "base/basictypes.h"
13 #include "base/containers/mru_cache.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/weak_ptr.h"
17 #include "base/strings/string16.h"
18 #include "chrome/browser/history/history_types.h"
19 #include "chrome/browser/history/query_parser.h"
20 #include "chrome/browser/history/text_database.h"
21 #include "chrome/browser/history/url_database.h"
22
23 namespace history {
24
25 class HistoryPublisher;
26 class VisitDatabase;
27
28 // Manages a set of text databases representing different time periods. This
29 // will page them in and out as necessary, and will manage queries for times
30 // spanning multiple databases.
31 //
32 // It will also keep a list of partial changes, such as page adds and title and
33 // body sets, all of which come in at different times for a given page. When
34 // all data is received or enough time has elapsed since adding, the indexed
35 // data will be committed.
36 //
37 // This allows us to minimize inserts and modifications, which are slow for the
38 // full text database, since each page's information is added exactly once.
39 //
40 // Note: be careful to delete the relevant entries from this uncommitted list
41 // when clearing history or this information may get added to the database soon
42 // after the clear.
43 class TextDatabaseManager {
44 public:
45 // Tracks a set of changes (only deletes need to be supported now) to the
46 // databases. This is opaque to the caller, but allows it to pass back a list
47 // of all database that it has caused a change to.
48 //
49 // This is necessary for the feature where we optimize full text databases
50 // which have changed as a result of the user deleting history via
51 // OptimizeChangedDatabases. We want to do each affected database only once at
52 // the end of the delete, but we don't want the caller to have to worry about
53 // our internals.
54 class ChangeSet {
55 public:
56 ChangeSet();
57 ~ChangeSet();
58
59 private:
60 friend class TextDatabaseManager;
61
62 typedef std::set<TextDatabase::DBIdent> DBSet;
63
64 void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); }
65
66 DBSet changed_databases_;
67 };
68
69 // You must call Init() to complete initialization.
70 //
71 // |dir| is the directory that will hold the full text database files (there
72 // will be many files named by their date ranges).
73 //
74 // The visit database is a pointer owned by the caller for the main database
75 // (of recent visits). The visit database will be updated to refer to the
76 // added text database entries.
77 TextDatabaseManager(const base::FilePath& dir,
78 URLDatabase* url_database,
79 VisitDatabase* visit_database);
80 ~TextDatabaseManager();
81
82 // Must call before using other functions. If it returns false, no other
83 // functions should be called.
84 bool Init(const HistoryPublisher* history_publisher);
85
86 // Returns the directory that holds the full text database files.
87 const base::FilePath& GetDir() { return dir_; }
88
89 // Allows scoping updates. This also allows things to go faster since every
90 // page add doesn't need to be committed to disk (slow). Note that files will
91 // still get created during a transaction.
92 void BeginTransaction();
93 void CommitTransaction();
94
95 // Sets specific information for the given page to be added to the database.
96 // In normal operation, URLs will be added as the user visits them, the titles
97 // and bodies will come in some time after that. These changes will be
98 // automatically coalesced and added to the database some time in the future
99 // using AddPageData().
100 //
101 // AddPageURL must be called for a given URL (+ its corresponding ID) before
102 // either the title or body set. The visit ID specifies the visit that will
103 // get updated to refer to the full text indexed information. The visit time
104 // should be the time corresponding to that visit in the database.
105 void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id,
106 base::Time visit_time);
107 void AddPageTitle(const GURL& url, const string16& title);
108 void AddPageContents(const GURL& url, const string16& body);
109
110 // Adds the given data to the appropriate database file, returning true on
111 // success. The visit database row identified by |visit_id| will be updated
112 // to refer to the full text index entry. If the visit ID is 0, the visit
113 // database will not be updated.
114 bool AddPageData(const GURL& url,
115 URLID url_id,
116 VisitID visit_id,
117 base::Time visit_time,
118 const string16& title,
119 const string16& body);
120
121 // Deletes the instance of indexed data identified by the given time and URL.
122 // Any changes will be tracked in the optional change set for use when calling
123 // OptimizeChangedDatabases later. change_set can be NULL.
124 void DeletePageData(base::Time time, const GURL& url,
125 ChangeSet* change_set);
126
127 // The text database manager keeps a list of changes that are made to the
128 // file AddPageURL/Title/Body that may not be committed to the database yet.
129 // This function removes entries from this list happening between the given
130 // time range. It is called when the user clears their history for a time
131 // range, and we don't want any of our data to "leak." If restrict_urls is
132 // not empty, only changes on those URLs are deleted.
133 //
134 // Either or both times my be is_null to be unbounded in that direction. When
135 // non-null, the range is [begin, end).
136 void DeleteFromUncommitted(const std::set<GURL>& restrict_urls,
137 base::Time begin, base::Time end);
138
139 // This function removes entries from the same list as
140 // DeleteFromUncommitted() with times belonging to the given list of
141 // times, which must be in reverse chronological order.
142 void DeleteFromUncommittedForTimes(const std::vector<base::Time>& times);
143
144 // Deletes all full text search data by removing the files from the disk.
145 // This must be called OUTSIDE of a transaction since it actually deletes the
146 // files rather than messing with the database.
147 void DeleteAll();
148
149 // Calls optimize on all the databases identified in a given change set (see
150 // the definition of ChangeSet above for more). Optimizing means that old data
151 // will be removed rather than marked unused.
152 void OptimizeChangedDatabases(const ChangeSet& change_set);
153
154 // Executes the given query. See QueryOptions for more info on input.
155 //
156 // The results are filled into |results|, and the first time considered for
157 // the output is in |first_time_searched| (see QueryResults for more).
158 //
159 // This function will return more than one match per URL if there is more than
160 // one entry for that URL in the database.
161 void GetTextMatches(const string16& query,
162 const QueryOptions& options,
163 std::vector<TextDatabase::Match>* results,
164 base::Time* first_time_searched);
165
166 size_t GetUncommittedEntryCountForTest() const;
167
168 private:
169 // These tests call ExpireRecentChangesForTime to force expiration.
170 FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial);
171 FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete);
172 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon);
173 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred);
174 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushURLsForTimes);
175 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest,
176 FlushRecentURLsUnstarredRestricted);
177
178 // Stores "recent stuff" that has happened with the page, since the page
179 // visit, title, and body all come in at different times.
180 class PageInfo {
181 public:
182 PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time);
183 ~PageInfo();
184
185 // Getters.
186 URLID url_id() const { return url_id_; }
187 VisitID visit_id() const { return visit_id_; }
188 base::Time visit_time() const { return visit_time_; }
189 const string16& title() const { return title_; }
190 const string16& body() const { return body_; }
191
192 // Setters, we can only update the title and body.
193 void set_title(const string16& ttl);
194 void set_body(const string16& bdy);
195
196 // Returns true if both the title or body of the entry has been set. Since
197 // both the title and body setters will "fix" empty strings to be a space,
198 // these indicate if the setter was ever called.
199 bool has_title() const { return !title_.empty(); }
200 bool has_body() { return !body_.empty(); }
201
202 // Returns true if this entry was added too long ago and we should give up
203 // waiting for more data. The current time is passed in as an argument so we
204 // can check many without re-querying the timer.
205 bool Expired(base::TimeTicks now) const;
206
207 private:
208 URLID url_id_;
209 VisitID visit_id_;
210
211 // Time of the visit of the URL. This will be the value stored in the URL
212 // and visit tables for the entry.
213 base::Time visit_time_;
214
215 // When this page entry was created. We have a cap on the maximum time that
216 // an entry will be in the queue before being flushed to the database.
217 base::TimeTicks added_time_;
218
219 // Will be the string " " when they are set to distinguish set and unset.
220 string16 title_;
221 string16 body_;
222 };
223
224 // Converts the given time to a database identifier or vice-versa.
225 static TextDatabase::DBIdent TimeToID(base::Time time);
226 static base::Time IDToTime(TextDatabase::DBIdent id);
227
228 // Returns a text database for the given identifier or time. This file will
229 // be created if it doesn't exist and |for_writing| is set. On error,
230 // including the case where the file doesn't exist and |for_writing|
231 // is false, it will return NULL.
232 //
233 // When |for_writing| is set, a transaction on the database will be opened
234 // if there is a transaction open on this manager.
235 //
236 // The pointer will be tracked in the cache. The caller should not store it
237 // or delete it since it will get automatically deleted as necessary.
238 TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing);
239 TextDatabase* GetDBForTime(base::Time time, bool for_writing);
240
241 // Populates the present_databases_ list based on which files are on disk.
242 // When the list is already initialized, this will do nothing, so you can
243 // call it whenever you want to ensure the present_databases_ set is filled.
244 void InitDBList();
245
246 // Schedules a call to ExpireRecentChanges in the future.
247 void ScheduleFlushOldChanges();
248
249 // Checks the recent_changes_ list and commits partial data that has been
250 // around too long.
251 void FlushOldChanges();
252
253 // Given "now," this will expire old things from the recent_changes_ list.
254 // This is used as the backend for FlushOldChanges and is called directly
255 // by the unit tests with fake times.
256 void FlushOldChangesForTime(base::TimeTicks now);
257
258 // Directory holding our index files.
259 const base::FilePath dir_;
260
261 // Non-owning pointers to the recent history databases for URLs and visits.
262 URLDatabase* url_database_;
263 VisitDatabase* visit_database_;
264
265 // Lists recent additions that we have not yet filled out with the title and
266 // body. Sorted by time, we will flush them when they are complete or have
267 // been in the queue too long without modification.
268 //
269 // We kind of abuse the MRUCache because we never move things around in it
270 // using Get. Instead, we keep them in the order they were inserted, since
271 // this is the metric we use to measure age. The MRUCache gives us an ordered
272 // list with fast lookup by URL.
273 typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
274 RecentChangeList recent_changes_;
275
276 // Nesting levels of transactions. Since sqlite only allows one open
277 // transaction, we simulate nested transactions by mapping the outermost one
278 // to a real transaction. Since this object never needs to do ROLLBACK, losing
279 // the ability for all transactions to rollback is inconsequential.
280 int transaction_nesting_;
281
282 // The cache owns the TextDatabase pointers, they will be automagically
283 // deleted when the cache entry is removed or expired.
284 typedef base::OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache;
285 DBCache db_cache_;
286
287 // Tells us about the existence of database files on disk. All existing
288 // databases will be in here, and non-existent ones will not, so we don't
289 // have to check the disk every time.
290 //
291 // This set is populated LAZILY by InitDBList(), you should call that function
292 // before accessing the list.
293 //
294 // Note that iterators will work on the keys in-order. Normally, reverse
295 // iterators will be used to iterate the keys in reverse-order.
296 typedef std::set<TextDatabase::DBIdent> DBIdentSet;
297 DBIdentSet present_databases_;
298 bool present_databases_loaded_; // Set by InitDBList when populated.
299
300 // Lists all databases with open transactions. These will have to be closed
301 // when the transaction is committed.
302 DBIdentSet open_transactions_;
303
304 QueryParser query_parser_;
305
306 // Generates tasks for our periodic checking of expired "recent changes".
307 base::WeakPtrFactory<TextDatabaseManager> weak_factory_;
308
309 // This object is created and managed by the history backend. We maintain an
310 // opaque pointer to the object for our use.
311 // This can be NULL if there are no indexers registered to receive indexing
312 // data from us.
313 const HistoryPublisher* history_publisher_;
314
315 DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager);
316 };
317
318 } // namespace history
319
320 #endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
OLDNEW
« no previous file with comments | « chrome/browser/history/text_database.cc ('k') | chrome/browser/history/text_database_manager.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698