Index: chrome/browser/importer/bookmark_html_reader.cc |
diff --git a/chrome/browser/importer/bookmark_html_reader.cc b/chrome/browser/importer/bookmark_html_reader.cc |
deleted file mode 100644 |
index 9f41ae71aec38e16cd034c3fcb319066333cf4d5..0000000000000000000000000000000000000000 |
--- a/chrome/browser/importer/bookmark_html_reader.cc |
+++ /dev/null |
@@ -1,431 +0,0 @@ |
-// Copyright 2013 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "chrome/browser/importer/bookmark_html_reader.h" |
- |
-#include "base/callback.h" |
-#include "base/file_util.h" |
-#include "base/i18n/icu_string_conversions.h" |
-#include "base/strings/string_number_conversions.h" |
-#include "base/strings/string_split.h" |
-#include "base/strings/string_util.h" |
-#include "base/time/time.h" |
-#include "chrome/browser/importer/reencode_favicon.h" |
-#include "chrome/common/importer/imported_bookmark_entry.h" |
-#include "chrome/common/importer/imported_favicon_usage.h" |
-#include "content/public/common/url_constants.h" |
-#include "net/base/data_url.h" |
-#include "net/base/escape.h" |
-#include "url/gurl.h" |
- |
-namespace { |
- |
-// Fetches the given |attribute| value from the |attribute_list|. Returns true |
-// if successful, and |value| will contain the value. |
-bool GetAttribute(const std::string& attribute_list, |
- const std::string& attribute, |
- std::string* value) { |
- const char kQuote[] = "\""; |
- |
- size_t begin = attribute_list.find(attribute + "=" + kQuote); |
- if (begin == std::string::npos) |
- return false; // Can't find the attribute. |
- |
- begin += attribute.size() + 2; |
- size_t end = begin + 1; |
- |
- while (end < attribute_list.size()) { |
- if (attribute_list[end] == '"' && |
- attribute_list[end - 1] != '\\') { |
- break; |
- } |
- end++; |
- } |
- |
- if (end == attribute_list.size()) |
- return false; // The value is not quoted. |
- |
- *value = attribute_list.substr(begin, end - begin); |
- return true; |
-} |
- |
-// Given the URL of a page and a favicon data URL, adds an appropriate record |
-// to the given favicon usage vector. |
-void DataURLToFaviconUsage( |
- const GURL& link_url, |
- const GURL& favicon_data, |
- std::vector<ImportedFaviconUsage>* favicons) { |
- if (!link_url.is_valid() || !favicon_data.is_valid() || |
- !favicon_data.SchemeIs(chrome::kDataScheme)) |
- return; |
- |
- // Parse the data URL. |
- std::string mime_type, char_set, data; |
- if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) || |
- data.empty()) |
- return; |
- |
- ImportedFaviconUsage usage; |
- if (!ReencodeFavicon(reinterpret_cast<const uint8*>(&data[0]), |
- data.size(), &usage.png_data)) |
- return; // Unable to decode. |
- |
- // We need to make up a URL for the favicon. We use a version of the page's |
- // URL so that we can be sure it will not collide. |
- usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec()); |
- |
- // We only have one URL per favicon for Firefox 2 bookmarks. |
- usage.urls.insert(link_url); |
- |
- favicons->push_back(usage); |
-} |
- |
-} // namespace |
- |
-namespace bookmark_html_reader { |
- |
-void ImportBookmarksFile( |
- const base::Callback<bool(void)>& cancellation_callback, |
- const base::Callback<bool(const GURL&)>& valid_url_callback, |
- const base::FilePath& file_path, |
- std::vector<ImportedBookmarkEntry>* bookmarks, |
- std::vector<ImportedFaviconUsage>* favicons) { |
- std::string content; |
- file_util::ReadFileToString(file_path, &content); |
- std::vector<std::string> lines; |
- base::SplitString(content, '\n', &lines); |
- |
- base::string16 last_folder; |
- bool last_folder_on_toolbar = false; |
- bool last_folder_is_empty = true; |
- bool has_subfolder = false; |
- base::Time last_folder_add_date; |
- std::vector<base::string16> path; |
- size_t toolbar_folder_index = 0; |
- std::string charset; |
- for (size_t i = 0; |
- i < lines.size() && |
- (cancellation_callback.is_null() || !cancellation_callback.Run()); |
- ++i) { |
- std::string line; |
- TrimString(lines[i], " ", &line); |
- |
- // Get the encoding of the bookmark file. |
- if (internal::ParseCharsetFromLine(line, &charset)) |
- continue; |
- |
- // Get the folder name. |
- if (internal::ParseFolderNameFromLine(line, |
- charset, |
- &last_folder, |
- &last_folder_on_toolbar, |
- &last_folder_add_date)) { |
- continue; |
- } |
- |
- // Get the bookmark entry. |
- base::string16 title; |
- base::string16 shortcut; |
- GURL url, favicon; |
- base::Time add_date; |
- base::string16 post_data; |
- bool is_bookmark; |
- // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based |
- // keywords yet. |
- is_bookmark = |
- internal::ParseBookmarkFromLine(line, charset, &title, |
- &url, &favicon, &shortcut, |
- &add_date, &post_data) || |
- internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url); |
- |
- if (is_bookmark) |
- last_folder_is_empty = false; |
- |
- if (is_bookmark && |
- post_data.empty() && |
- (valid_url_callback.is_null() || valid_url_callback.Run(url))) { |
- if (toolbar_folder_index > path.size() && !path.empty()) { |
- NOTREACHED(); // error in parsing. |
- break; |
- } |
- |
- ImportedBookmarkEntry entry; |
- entry.creation_time = add_date; |
- entry.url = url; |
- entry.title = title; |
- |
- if (toolbar_folder_index) { |
- // The toolbar folder should be at the top level. |
- entry.in_toolbar = true; |
- entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end()); |
- } else { |
- // Add this bookmark to the list of |bookmarks|. |
- if (!has_subfolder && !last_folder.empty()) { |
- path.push_back(last_folder); |
- last_folder.clear(); |
- } |
- entry.path.assign(path.begin(), path.end()); |
- } |
- bookmarks->push_back(entry); |
- |
- // Save the favicon. DataURLToFaviconUsage will handle the case where |
- // there is no favicon. |
- if (favicons) |
- DataURLToFaviconUsage(url, favicon, favicons); |
- |
- continue; |
- } |
- |
- // Bookmarks in sub-folder are encapsulated with <DL> tag. |
- if (StartsWithASCII(line, "<DL>", false)) { |
- has_subfolder = true; |
- if (!last_folder.empty()) { |
- path.push_back(last_folder); |
- last_folder.clear(); |
- } |
- if (last_folder_on_toolbar && !toolbar_folder_index) |
- toolbar_folder_index = path.size(); |
- |
- // Mark next folder empty as initial state. |
- last_folder_is_empty = true; |
- } else if (StartsWithASCII(line, "</DL>", false)) { |
- if (path.empty()) |
- break; // Mismatch <DL>. |
- |
- base::string16 folder_title = path.back(); |
- path.pop_back(); |
- |
- if (last_folder_is_empty) { |
- // Empty folder should be added explicitly. |
- ImportedBookmarkEntry entry; |
- entry.is_folder = true; |
- entry.creation_time = last_folder_add_date; |
- entry.title = folder_title; |
- if (toolbar_folder_index) { |
- // The toolbar folder should be at the top level. |
- // Make sure we don't add the toolbar folder itself if it is empty. |
- if (toolbar_folder_index <= path.size()) { |
- entry.in_toolbar = true; |
- entry.path.assign(path.begin() + toolbar_folder_index - 1, |
- path.end()); |
- bookmarks->push_back(entry); |
- } |
- } else { |
- // Add this folder to the list of |bookmarks|. |
- entry.path.assign(path.begin(), path.end()); |
- bookmarks->push_back(entry); |
- } |
- |
- // Parent folder include current one, so it's not empty. |
- last_folder_is_empty = false; |
- } |
- |
- if (toolbar_folder_index > path.size()) |
- toolbar_folder_index = 0; |
- } |
- } |
-} |
- |
-namespace internal { |
- |
-bool ParseCharsetFromLine(const std::string& line, std::string* charset) { |
- const char kCharset[] = "charset="; |
- if (StartsWithASCII(line, "<META", false) && |
- (line.find("CONTENT=\"") != std::string::npos || |
- line.find("content=\"") != std::string::npos)) { |
- size_t begin = line.find(kCharset); |
- if (begin == std::string::npos) |
- return false; |
- begin += std::string(kCharset).size(); |
- size_t end = line.find_first_of('\"', begin); |
- *charset = line.substr(begin, end - begin); |
- return true; |
- } |
- return false; |
-} |
- |
-bool ParseFolderNameFromLine(const std::string& line, |
- const std::string& charset, |
- base::string16* folder_name, |
- bool* is_toolbar_folder, |
- base::Time* add_date) { |
- const char kFolderOpen[] = "<DT><H3"; |
- const char kFolderClose[] = "</H3>"; |
- const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER"; |
- const char kAddDateAttribute[] = "ADD_DATE"; |
- |
- if (!StartsWithASCII(line, kFolderOpen, true)) |
- return false; |
- |
- size_t end = line.find(kFolderClose); |
- size_t tag_end = line.rfind('>', end) + 1; |
- // If no end tag or start tag is broken, we skip to find the folder name. |
- if (end == std::string::npos || tag_end < arraysize(kFolderOpen)) |
- return false; |
- |
- base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
- base::OnStringConversionError::SKIP, folder_name); |
- *folder_name = net::UnescapeForHTML(*folder_name); |
- |
- std::string attribute_list = line.substr(arraysize(kFolderOpen), |
- tag_end - arraysize(kFolderOpen) - 1); |
- std::string value; |
- |
- // Add date |
- if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { |
- int64 time; |
- base::StringToInt64(value, &time); |
- // Upper bound it at 32 bits. |
- if (0 < time && time < (1LL << 32)) |
- *add_date = base::Time::FromTimeT(time); |
- } |
- |
- if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) && |
- LowerCaseEqualsASCII(value, "true")) |
- *is_toolbar_folder = true; |
- else |
- *is_toolbar_folder = false; |
- |
- return true; |
-} |
- |
-bool ParseBookmarkFromLine(const std::string& line, |
- const std::string& charset, |
- base::string16* title, |
- GURL* url, |
- GURL* favicon, |
- base::string16* shortcut, |
- base::Time* add_date, |
- base::string16* post_data) { |
- const char kItemOpen[] = "<DT><A"; |
- const char kItemClose[] = "</A>"; |
- const char kFeedURLAttribute[] = "FEEDURL"; |
- const char kHrefAttribute[] = "HREF"; |
- const char kIconAttribute[] = "ICON"; |
- const char kShortcutURLAttribute[] = "SHORTCUTURL"; |
- const char kAddDateAttribute[] = "ADD_DATE"; |
- const char kPostDataAttribute[] = "POST_DATA"; |
- |
- title->clear(); |
- *url = GURL(); |
- *favicon = GURL(); |
- shortcut->clear(); |
- post_data->clear(); |
- *add_date = base::Time(); |
- |
- if (!StartsWithASCII(line, kItemOpen, true)) |
- return false; |
- |
- size_t end = line.find(kItemClose); |
- size_t tag_end = line.rfind('>', end) + 1; |
- if (end == std::string::npos || tag_end < arraysize(kItemOpen)) |
- return false; // No end tag or start tag is broken. |
- |
- std::string attribute_list = line.substr(arraysize(kItemOpen), |
- tag_end - arraysize(kItemOpen) - 1); |
- |
- // We don't import Live Bookmark folders, which is Firefox's RSS reading |
- // feature, since the user never necessarily bookmarked them and we don't |
- // have this feature to update their contents. |
- std::string value; |
- if (GetAttribute(attribute_list, kFeedURLAttribute, &value)) |
- return false; |
- |
- // Title |
- base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
- base::OnStringConversionError::SKIP, title); |
- *title = net::UnescapeForHTML(*title); |
- |
- // URL |
- if (GetAttribute(attribute_list, kHrefAttribute, &value)) { |
- base::string16 url16; |
- base::CodepageToUTF16(value, charset.c_str(), |
- base::OnStringConversionError::SKIP, &url16); |
- url16 = net::UnescapeForHTML(url16); |
- |
- *url = GURL(url16); |
- } |
- |
- // Favicon |
- if (GetAttribute(attribute_list, kIconAttribute, &value)) |
- *favicon = GURL(value); |
- |
- // Keyword |
- if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) { |
- base::CodepageToUTF16(value, charset.c_str(), |
- base::OnStringConversionError::SKIP, shortcut); |
- *shortcut = net::UnescapeForHTML(*shortcut); |
- } |
- |
- // Add date |
- if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { |
- int64 time; |
- base::StringToInt64(value, &time); |
- // Upper bound it at 32 bits. |
- if (0 < time && time < (1LL << 32)) |
- *add_date = base::Time::FromTimeT(time); |
- } |
- |
- // Post data. |
- if (GetAttribute(attribute_list, kPostDataAttribute, &value)) { |
- base::CodepageToUTF16(value, charset.c_str(), |
- base::OnStringConversionError::SKIP, post_data); |
- *post_data = net::UnescapeForHTML(*post_data); |
- } |
- |
- return true; |
-} |
- |
-bool ParseMinimumBookmarkFromLine(const std::string& line, |
- const std::string& charset, |
- base::string16* title, |
- GURL* url) { |
- const char kItemOpen[] = "<DT><A"; |
- const char kItemClose[] = "</"; |
- const char kHrefAttributeUpper[] = "HREF"; |
- const char kHrefAttributeLower[] = "href"; |
- |
- title->clear(); |
- *url = GURL(); |
- |
- // Case-insensitive check of open tag. |
- if (!StartsWithASCII(line, kItemOpen, false)) |
- return false; |
- |
- // Find any close tag. |
- size_t end = line.find(kItemClose); |
- size_t tag_end = line.rfind('>', end) + 1; |
- if (end == std::string::npos || tag_end < arraysize(kItemOpen)) |
- return false; // No end tag or start tag is broken. |
- |
- std::string attribute_list = line.substr(arraysize(kItemOpen), |
- tag_end - arraysize(kItemOpen) - 1); |
- |
- // Title |
- base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
- base::OnStringConversionError::SKIP, title); |
- *title = net::UnescapeForHTML(*title); |
- |
- // URL |
- std::string value; |
- if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) || |
- GetAttribute(attribute_list, kHrefAttributeLower, &value)) { |
- if (charset.length() != 0) { |
- base::string16 url16; |
- base::CodepageToUTF16(value, charset.c_str(), |
- base::OnStringConversionError::SKIP, &url16); |
- url16 = net::UnescapeForHTML(url16); |
- |
- *url = GURL(url16); |
- } else { |
- *url = GURL(value); |
- } |
- } |
- |
- return true; |
-} |
- |
-} // namespace internal |
- |
-} // namespace bookmark_html_reader |