OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ | |
6 #define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ | |
7 | |
8 #include <set> | |
9 #include <vector> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/files/file_path.h" | |
13 #include "base/strings/string16.h" | |
14 #include "chrome/browser/history/history_types.h" | |
15 #include "sql/connection.h" | |
16 #include "sql/meta_table.h" | |
17 #include "url/gurl.h" | |
18 | |
19 namespace history { | |
20 | |
21 // Encapsulation of a full-text indexed database file. | |
22 class TextDatabase { | |
23 public: | |
24 typedef int DBIdent; | |
25 | |
26 typedef std::set<GURL> URLSet; | |
27 | |
28 // Returned from the search function. | |
29 struct Match { | |
30 Match(); | |
31 ~Match(); | |
32 | |
33 // URL of the match. | |
34 GURL url; | |
35 | |
36 // The title is returned because the title in the text database and the URL | |
37 // database may differ. This happens because we capture the title when the | |
38 // body is captured, and don't update it later. | |
39 string16 title; | |
40 | |
41 // Time the page that was returned was visited. | |
42 base::Time time; | |
43 | |
44 // Identifies any found matches in the title of the document. These are not | |
45 // included in the snippet. | |
46 Snippet::MatchPositions title_match_positions; | |
47 | |
48 // Snippet of the match we generated from the body. | |
49 Snippet snippet; | |
50 }; | |
51 | |
52 // Note: You must call init which must succeed before using this class. | |
53 // | |
54 // Computes the matches for the query, returning results in decreasing order | |
55 // of visit time. | |
56 // | |
57 // This function will attach the new database to the given database | |
58 // connection. This allows one sqlite3 object to share many TextDatabases, | |
59 // meaning that they will all share the same cache, which allows us to limit | |
60 // the total size that text indexing databasii can take up. | |
61 // | |
62 // |file_name| is the name of the file on disk. | |
63 // | |
64 // ID is the identifier for the database. It should uniquely identify it among | |
65 // other databases on disk and in the sqlite connection. | |
66 // | |
67 // |allow_create| indicates if we want to allow creation of the file if it | |
68 // doesn't exist. For files associated with older time periods, we don't want | |
69 // to create them if they don't exist, so this flag would be false. | |
70 TextDatabase(const base::FilePath& path, | |
71 DBIdent id, | |
72 bool allow_create); | |
73 ~TextDatabase(); | |
74 | |
75 // Initializes the database connection and creates the file if the class | |
76 // was created with |allow_create|. If the file couldn't be opened or | |
77 // created, this will return false. No other functions should be called | |
78 // after this. | |
79 bool Init(); | |
80 | |
81 // Allows updates to be batched. This gives higher performance when multiple | |
82 // updates are happening because every insert doesn't require a sync to disk. | |
83 // Transactions can be nested, only the outermost one will actually count. | |
84 void BeginTransaction(); | |
85 void CommitTransaction(); | |
86 | |
87 // For testing, returns the file name of the database so it can be deleted | |
88 // after the test. This is valid even before Init() is called. | |
89 const base::FilePath& file_name() const { return file_name_; } | |
90 | |
91 // Returns a NULL-terminated string that is the base of history index files, | |
92 // which is the part before the database identifier. For example | |
93 // "History Index *". This is for finding existing database files. | |
94 static const base::FilePath::CharType* file_base(); | |
95 | |
96 // Converts a filename on disk (optionally including a path) to a database | |
97 // identifier. If the filename doesn't have the correct format, returns 0. | |
98 static DBIdent FileNameToID(const base::FilePath& file_path); | |
99 | |
100 // Changing operations ------------------------------------------------------- | |
101 | |
102 // Adds the given data to the page. Returns true on success. The data should | |
103 // already be converted to UTF-8. | |
104 bool AddPageData(base::Time time, | |
105 const std::string& url, | |
106 const std::string& title, | |
107 const std::string& contents); | |
108 | |
109 // Deletes the indexed data exactly matching the given URL/time pair. | |
110 void DeletePageData(base::Time time, const std::string& url); | |
111 | |
112 // Optimizes the tree inside the database. This will, in addition to making | |
113 // access faster, remove any deleted data from the database (normally it is | |
114 // added again as "removed" and it is manually cleaned up when it decides to | |
115 // optimize it naturally). It is bad for privacy if a user is deleting a | |
116 // page from history but it still exists in the full text database in some | |
117 // form. This function will clean that up. | |
118 void Optimize(); | |
119 | |
120 // Querying ------------------------------------------------------------------ | |
121 | |
122 // Executes the given query. See QueryOptions for more info on input. | |
123 // | |
124 // The results are appended to any existing ones in |*results|. | |
125 // | |
126 // Any URLs found will be added to |unique_urls|. If a URL is already in the | |
127 // set, additional results will not be added (giving the ability to uniquify | |
128 // URL results). | |
129 // | |
130 // Callers must run QueryParser on the user text and pass the results of the | |
131 // QueryParser to this method as the query string. | |
132 // | |
133 // Returns true if there are more results available, i.e. if the number of | |
134 // results was restricted by |options.max_count|. | |
135 bool GetTextMatches(const std::string& query, | |
136 const QueryOptions& options, | |
137 std::vector<Match>* results, | |
138 URLSet* unique_urls); | |
139 | |
140 // Converts the given database identifier to a filename. This does not include | |
141 // the path, just the file and extension. | |
142 static base::FilePath IDToFileName(DBIdent id); | |
143 | |
144 private: | |
145 // Ensures that the tables and indices are created. Returns true on success. | |
146 bool CreateTables(); | |
147 | |
148 // The sql database. Not valid until Init is called. | |
149 sql::Connection db_; | |
150 | |
151 const base::FilePath path_; | |
152 const DBIdent ident_; | |
153 const bool allow_create_; | |
154 | |
155 // Full file name of the file on disk, computed in Init(). | |
156 base::FilePath file_name_; | |
157 | |
158 sql::MetaTable meta_table_; | |
159 | |
160 DISALLOW_COPY_AND_ASSIGN(TextDatabase); | |
161 }; | |
162 | |
163 } // namespace history | |
164 | |
165 #endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ | |
OLD | NEW |