Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(736)

Side by Side Diff: chrome/browser/importer/bookmark_html_reader.cc

Issue 18501013: Move most importer code to chrome/utility/importer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: another gyp attempt Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/importer/bookmark_html_reader.h"
6
7 #include "base/callback.h"
8 #include "base/file_util.h"
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h"
13 #include "base/time/time.h"
14 #include "chrome/browser/importer/reencode_favicon.h"
15 #include "chrome/common/importer/imported_bookmark_entry.h"
16 #include "chrome/common/importer/imported_favicon_usage.h"
17 #include "content/public/common/url_constants.h"
18 #include "net/base/data_url.h"
19 #include "net/base/escape.h"
20 #include "url/gurl.h"
21
22 namespace {
23
24 // Fetches the given |attribute| value from the |attribute_list|. Returns true
25 // if successful, and |value| will contain the value.
26 bool GetAttribute(const std::string& attribute_list,
27 const std::string& attribute,
28 std::string* value) {
29 const char kQuote[] = "\"";
30
31 size_t begin = attribute_list.find(attribute + "=" + kQuote);
32 if (begin == std::string::npos)
33 return false; // Can't find the attribute.
34
35 begin += attribute.size() + 2;
36 size_t end = begin + 1;
37
38 while (end < attribute_list.size()) {
39 if (attribute_list[end] == '"' &&
40 attribute_list[end - 1] != '\\') {
41 break;
42 }
43 end++;
44 }
45
46 if (end == attribute_list.size())
47 return false; // The value is not quoted.
48
49 *value = attribute_list.substr(begin, end - begin);
50 return true;
51 }
52
53 // Given the URL of a page and a favicon data URL, adds an appropriate record
54 // to the given favicon usage vector.
55 void DataURLToFaviconUsage(
56 const GURL& link_url,
57 const GURL& favicon_data,
58 std::vector<ImportedFaviconUsage>* favicons) {
59 if (!link_url.is_valid() || !favicon_data.is_valid() ||
60 !favicon_data.SchemeIs(chrome::kDataScheme))
61 return;
62
63 // Parse the data URL.
64 std::string mime_type, char_set, data;
65 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) ||
66 data.empty())
67 return;
68
69 ImportedFaviconUsage usage;
70 if (!ReencodeFavicon(reinterpret_cast<const uint8*>(&data[0]),
71 data.size(), &usage.png_data))
72 return; // Unable to decode.
73
74 // We need to make up a URL for the favicon. We use a version of the page's
75 // URL so that we can be sure it will not collide.
76 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec());
77
78 // We only have one URL per favicon for Firefox 2 bookmarks.
79 usage.urls.insert(link_url);
80
81 favicons->push_back(usage);
82 }
83
84 } // namespace
85
86 namespace bookmark_html_reader {
87
88 void ImportBookmarksFile(
89 const base::Callback<bool(void)>& cancellation_callback,
90 const base::Callback<bool(const GURL&)>& valid_url_callback,
91 const base::FilePath& file_path,
92 std::vector<ImportedBookmarkEntry>* bookmarks,
93 std::vector<ImportedFaviconUsage>* favicons) {
94 std::string content;
95 file_util::ReadFileToString(file_path, &content);
96 std::vector<std::string> lines;
97 base::SplitString(content, '\n', &lines);
98
99 base::string16 last_folder;
100 bool last_folder_on_toolbar = false;
101 bool last_folder_is_empty = true;
102 bool has_subfolder = false;
103 base::Time last_folder_add_date;
104 std::vector<base::string16> path;
105 size_t toolbar_folder_index = 0;
106 std::string charset;
107 for (size_t i = 0;
108 i < lines.size() &&
109 (cancellation_callback.is_null() || !cancellation_callback.Run());
110 ++i) {
111 std::string line;
112 TrimString(lines[i], " ", &line);
113
114 // Get the encoding of the bookmark file.
115 if (internal::ParseCharsetFromLine(line, &charset))
116 continue;
117
118 // Get the folder name.
119 if (internal::ParseFolderNameFromLine(line,
120 charset,
121 &last_folder,
122 &last_folder_on_toolbar,
123 &last_folder_add_date)) {
124 continue;
125 }
126
127 // Get the bookmark entry.
128 base::string16 title;
129 base::string16 shortcut;
130 GURL url, favicon;
131 base::Time add_date;
132 base::string16 post_data;
133 bool is_bookmark;
134 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based
135 // keywords yet.
136 is_bookmark =
137 internal::ParseBookmarkFromLine(line, charset, &title,
138 &url, &favicon, &shortcut,
139 &add_date, &post_data) ||
140 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url);
141
142 if (is_bookmark)
143 last_folder_is_empty = false;
144
145 if (is_bookmark &&
146 post_data.empty() &&
147 (valid_url_callback.is_null() || valid_url_callback.Run(url))) {
148 if (toolbar_folder_index > path.size() && !path.empty()) {
149 NOTREACHED(); // error in parsing.
150 break;
151 }
152
153 ImportedBookmarkEntry entry;
154 entry.creation_time = add_date;
155 entry.url = url;
156 entry.title = title;
157
158 if (toolbar_folder_index) {
159 // The toolbar folder should be at the top level.
160 entry.in_toolbar = true;
161 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end());
162 } else {
163 // Add this bookmark to the list of |bookmarks|.
164 if (!has_subfolder && !last_folder.empty()) {
165 path.push_back(last_folder);
166 last_folder.clear();
167 }
168 entry.path.assign(path.begin(), path.end());
169 }
170 bookmarks->push_back(entry);
171
172 // Save the favicon. DataURLToFaviconUsage will handle the case where
173 // there is no favicon.
174 if (favicons)
175 DataURLToFaviconUsage(url, favicon, favicons);
176
177 continue;
178 }
179
180 // Bookmarks in sub-folder are encapsulated with <DL> tag.
181 if (StartsWithASCII(line, "<DL>", false)) {
182 has_subfolder = true;
183 if (!last_folder.empty()) {
184 path.push_back(last_folder);
185 last_folder.clear();
186 }
187 if (last_folder_on_toolbar && !toolbar_folder_index)
188 toolbar_folder_index = path.size();
189
190 // Mark next folder empty as initial state.
191 last_folder_is_empty = true;
192 } else if (StartsWithASCII(line, "</DL>", false)) {
193 if (path.empty())
194 break; // Mismatch <DL>.
195
196 base::string16 folder_title = path.back();
197 path.pop_back();
198
199 if (last_folder_is_empty) {
200 // Empty folder should be added explicitly.
201 ImportedBookmarkEntry entry;
202 entry.is_folder = true;
203 entry.creation_time = last_folder_add_date;
204 entry.title = folder_title;
205 if (toolbar_folder_index) {
206 // The toolbar folder should be at the top level.
207 // Make sure we don't add the toolbar folder itself if it is empty.
208 if (toolbar_folder_index <= path.size()) {
209 entry.in_toolbar = true;
210 entry.path.assign(path.begin() + toolbar_folder_index - 1,
211 path.end());
212 bookmarks->push_back(entry);
213 }
214 } else {
215 // Add this folder to the list of |bookmarks|.
216 entry.path.assign(path.begin(), path.end());
217 bookmarks->push_back(entry);
218 }
219
220 // Parent folder include current one, so it's not empty.
221 last_folder_is_empty = false;
222 }
223
224 if (toolbar_folder_index > path.size())
225 toolbar_folder_index = 0;
226 }
227 }
228 }
229
230 namespace internal {
231
232 bool ParseCharsetFromLine(const std::string& line, std::string* charset) {
233 const char kCharset[] = "charset=";
234 if (StartsWithASCII(line, "<META", false) &&
235 (line.find("CONTENT=\"") != std::string::npos ||
236 line.find("content=\"") != std::string::npos)) {
237 size_t begin = line.find(kCharset);
238 if (begin == std::string::npos)
239 return false;
240 begin += std::string(kCharset).size();
241 size_t end = line.find_first_of('\"', begin);
242 *charset = line.substr(begin, end - begin);
243 return true;
244 }
245 return false;
246 }
247
248 bool ParseFolderNameFromLine(const std::string& line,
249 const std::string& charset,
250 base::string16* folder_name,
251 bool* is_toolbar_folder,
252 base::Time* add_date) {
253 const char kFolderOpen[] = "<DT><H3";
254 const char kFolderClose[] = "</H3>";
255 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER";
256 const char kAddDateAttribute[] = "ADD_DATE";
257
258 if (!StartsWithASCII(line, kFolderOpen, true))
259 return false;
260
261 size_t end = line.find(kFolderClose);
262 size_t tag_end = line.rfind('>', end) + 1;
263 // If no end tag or start tag is broken, we skip to find the folder name.
264 if (end == std::string::npos || tag_end < arraysize(kFolderOpen))
265 return false;
266
267 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
268 base::OnStringConversionError::SKIP, folder_name);
269 *folder_name = net::UnescapeForHTML(*folder_name);
270
271 std::string attribute_list = line.substr(arraysize(kFolderOpen),
272 tag_end - arraysize(kFolderOpen) - 1);
273 std::string value;
274
275 // Add date
276 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
277 int64 time;
278 base::StringToInt64(value, &time);
279 // Upper bound it at 32 bits.
280 if (0 < time && time < (1LL << 32))
281 *add_date = base::Time::FromTimeT(time);
282 }
283
284 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) &&
285 LowerCaseEqualsASCII(value, "true"))
286 *is_toolbar_folder = true;
287 else
288 *is_toolbar_folder = false;
289
290 return true;
291 }
292
293 bool ParseBookmarkFromLine(const std::string& line,
294 const std::string& charset,
295 base::string16* title,
296 GURL* url,
297 GURL* favicon,
298 base::string16* shortcut,
299 base::Time* add_date,
300 base::string16* post_data) {
301 const char kItemOpen[] = "<DT><A";
302 const char kItemClose[] = "</A>";
303 const char kFeedURLAttribute[] = "FEEDURL";
304 const char kHrefAttribute[] = "HREF";
305 const char kIconAttribute[] = "ICON";
306 const char kShortcutURLAttribute[] = "SHORTCUTURL";
307 const char kAddDateAttribute[] = "ADD_DATE";
308 const char kPostDataAttribute[] = "POST_DATA";
309
310 title->clear();
311 *url = GURL();
312 *favicon = GURL();
313 shortcut->clear();
314 post_data->clear();
315 *add_date = base::Time();
316
317 if (!StartsWithASCII(line, kItemOpen, true))
318 return false;
319
320 size_t end = line.find(kItemClose);
321 size_t tag_end = line.rfind('>', end) + 1;
322 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
323 return false; // No end tag or start tag is broken.
324
325 std::string attribute_list = line.substr(arraysize(kItemOpen),
326 tag_end - arraysize(kItemOpen) - 1);
327
328 // We don't import Live Bookmark folders, which is Firefox's RSS reading
329 // feature, since the user never necessarily bookmarked them and we don't
330 // have this feature to update their contents.
331 std::string value;
332 if (GetAttribute(attribute_list, kFeedURLAttribute, &value))
333 return false;
334
335 // Title
336 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
337 base::OnStringConversionError::SKIP, title);
338 *title = net::UnescapeForHTML(*title);
339
340 // URL
341 if (GetAttribute(attribute_list, kHrefAttribute, &value)) {
342 base::string16 url16;
343 base::CodepageToUTF16(value, charset.c_str(),
344 base::OnStringConversionError::SKIP, &url16);
345 url16 = net::UnescapeForHTML(url16);
346
347 *url = GURL(url16);
348 }
349
350 // Favicon
351 if (GetAttribute(attribute_list, kIconAttribute, &value))
352 *favicon = GURL(value);
353
354 // Keyword
355 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) {
356 base::CodepageToUTF16(value, charset.c_str(),
357 base::OnStringConversionError::SKIP, shortcut);
358 *shortcut = net::UnescapeForHTML(*shortcut);
359 }
360
361 // Add date
362 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
363 int64 time;
364 base::StringToInt64(value, &time);
365 // Upper bound it at 32 bits.
366 if (0 < time && time < (1LL << 32))
367 *add_date = base::Time::FromTimeT(time);
368 }
369
370 // Post data.
371 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) {
372 base::CodepageToUTF16(value, charset.c_str(),
373 base::OnStringConversionError::SKIP, post_data);
374 *post_data = net::UnescapeForHTML(*post_data);
375 }
376
377 return true;
378 }
379
380 bool ParseMinimumBookmarkFromLine(const std::string& line,
381 const std::string& charset,
382 base::string16* title,
383 GURL* url) {
384 const char kItemOpen[] = "<DT><A";
385 const char kItemClose[] = "</";
386 const char kHrefAttributeUpper[] = "HREF";
387 const char kHrefAttributeLower[] = "href";
388
389 title->clear();
390 *url = GURL();
391
392 // Case-insensitive check of open tag.
393 if (!StartsWithASCII(line, kItemOpen, false))
394 return false;
395
396 // Find any close tag.
397 size_t end = line.find(kItemClose);
398 size_t tag_end = line.rfind('>', end) + 1;
399 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
400 return false; // No end tag or start tag is broken.
401
402 std::string attribute_list = line.substr(arraysize(kItemOpen),
403 tag_end - arraysize(kItemOpen) - 1);
404
405 // Title
406 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
407 base::OnStringConversionError::SKIP, title);
408 *title = net::UnescapeForHTML(*title);
409
410 // URL
411 std::string value;
412 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) ||
413 GetAttribute(attribute_list, kHrefAttributeLower, &value)) {
414 if (charset.length() != 0) {
415 base::string16 url16;
416 base::CodepageToUTF16(value, charset.c_str(),
417 base::OnStringConversionError::SKIP, &url16);
418 url16 = net::UnescapeForHTML(url16);
419
420 *url = GURL(url16);
421 } else {
422 *url = GURL(value);
423 }
424 }
425
426 return true;
427 }
428
429 } // namespace internal
430
431 } // namespace bookmark_html_reader
OLDNEW
« no previous file with comments | « chrome/browser/importer/bookmark_html_reader.h ('k') | chrome/browser/importer/bookmark_html_reader_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698