OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/android/history_report/delta_file_commons.h" |
| 6 |
| 7 #include <iomanip> |
| 8 |
| 9 #include "base/strings/string_number_conversions.h" |
| 10 #include "base/strings/utf_string_conversions.h" |
| 11 #include "crypto/sha2.h" |
| 12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| 13 |
| 14 using bookmarks::BookmarkModel; |
| 15 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES; |
| 16 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES; |
| 17 using net::registry_controlled_domains::GetRegistryLength; |
| 18 |
| 19 namespace { |
| 20 |
| 21 const int kBookmarkScoreBonusMultiplier = 3; |
| 22 const size_t kIdLengthLimit = 256; |
| 23 const int kSHA256ByteSize = 32; |
| 24 const size_t kUrlLengthLimit = 20 * 1024 * 1024; // 20M |
| 25 const size_t kUrlLengthWidth = 8; |
| 26 |
| 27 void StripTopLevelDomain(std::string* host) { |
| 28 size_t registry_length = GetRegistryLength( |
| 29 *host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES); |
| 30 if (registry_length != 0 && registry_length != std::string::npos) |
| 31 host->erase(host->length() - (registry_length + 1)); |
| 32 } |
| 33 |
| 34 void StripCommonSubDomains(std::string* host) { |
| 35 std::string www_prefix("www."); |
| 36 std::string ww2_prefix("ww2."); |
| 37 if (host->compare(0, www_prefix.size(), www_prefix) == 0) { |
| 38 host->erase(0, www_prefix.size()); |
| 39 } else if (host->compare(0, ww2_prefix.size(), ww2_prefix) == 0) { |
| 40 host->erase(0, ww2_prefix.size()); |
| 41 } |
| 42 } |
| 43 |
| 44 } // namespace |
| 45 |
| 46 namespace history_report { |
| 47 |
| 48 DeltaFileEntryWithData::DeltaFileEntryWithData(DeltaFileEntry entry) |
| 49 : entry_(entry), |
| 50 data_set_(false), |
| 51 is_bookmark_(false) {} |
| 52 |
| 53 DeltaFileEntryWithData::~DeltaFileEntryWithData() {} |
| 54 |
| 55 int64 DeltaFileEntryWithData::SeqNo() const { |
| 56 return entry_.seq_no(); |
| 57 } |
| 58 |
| 59 std::string DeltaFileEntryWithData::Type() const { |
| 60 // If deletion entry has data then it's not a real deletion entry |
| 61 // but an update entry. Real deletion entry never has data. |
| 62 if (data_set_) return "add"; |
| 63 return entry_.type(); |
| 64 } |
| 65 |
| 66 // Generates a unique ID for a given URL. |
| 67 // It must be shorter than or equal to |kIdLengthLimit| characters. |
| 68 // If URL is shorter than or equal to |kIdLengthLimit| then ID is the URL |
| 69 // itself. Otherwise it has a form of 3 concatenated parts: |
| 70 // 1. Length of URL. Zero-padded integer to width |kUrlLengthWidth|, |
| 71 // because URLs are limited to 20M in Chrome. |
| 72 // 2. SHA-256 of URL which takes 64 characters. |
| 73 // 3. Prefix of URL of size |kIdLengthLimit| - 64 - |kUrlLengthWidth|. |
| 74 std::string DeltaFileEntryWithData::UrlToId(const std::string& url) { |
| 75 if (url.size() > kUrlLengthLimit) { |
| 76 return "error: url too long"; |
| 77 } |
| 78 |
| 79 if (IsValidId(url)) { |
| 80 return url; |
| 81 } |
| 82 |
| 83 std::stringstream id; |
| 84 |
| 85 // 1. Zero-padded URL length to width |kUrlLengthWidth|. |
| 86 id << std::setfill('0') << std::setw(kUrlLengthWidth) << url.size(); |
| 87 |
| 88 // 2. SHA-256 of URL. |
| 89 uint8 hash[kSHA256ByteSize]; |
| 90 crypto::SHA256HashString(url, hash, sizeof(hash)); |
| 91 id << base::HexEncode(hash, sizeof(hash)); |
| 92 |
| 93 // 3. Prefix of URL to fill rest of the space. |
| 94 id << url.substr(0, kIdLengthLimit - 2 * kSHA256ByteSize - kUrlLengthWidth); |
| 95 |
| 96 return id.str(); |
| 97 } |
| 98 |
| 99 // ID which identifies URL of this entry. |
| 100 std::string DeltaFileEntryWithData::Id() const { |
| 101 return UrlToId(entry_.url()); |
| 102 } |
| 103 |
| 104 std::string DeltaFileEntryWithData::Url() const { |
| 105 return entry_.url(); |
| 106 } |
| 107 |
| 108 base::string16 DeltaFileEntryWithData::Title() const { |
| 109 if (!Valid()) return base::UTF8ToUTF16(""); |
| 110 if (is_bookmark_ && !bookmark_title_.empty()) return bookmark_title_; |
| 111 if (data_.title().empty()) return base::UTF8ToUTF16(data_.url().host()); |
| 112 return data_.title(); |
| 113 } |
| 114 |
| 115 int32 DeltaFileEntryWithData::Score() const { |
| 116 if (!Valid()) return 0; |
| 117 int32 score = data_.visit_count() + data_.typed_count(); |
| 118 if (is_bookmark_) score = (score + 1) * kBookmarkScoreBonusMultiplier; |
| 119 return score; |
| 120 } |
| 121 |
| 122 std::string DeltaFileEntryWithData::IndexedUrl() const { |
| 123 if (!Valid()) return ""; |
| 124 std::string indexed_url = data_.url().host(); |
| 125 StripTopLevelDomain(&indexed_url); |
| 126 StripCommonSubDomains(&indexed_url); |
| 127 return indexed_url; |
| 128 } |
| 129 |
| 130 bool DeltaFileEntryWithData::Valid() const { |
| 131 return entry_.type() == "del" || is_bookmark_ || |
| 132 (data_set_ && !data_.hidden()); |
| 133 } |
| 134 |
| 135 void DeltaFileEntryWithData::SetData(const history::URLRow& data) { |
| 136 data_set_ = true; |
| 137 data_ = data; |
| 138 } |
| 139 |
| 140 void DeltaFileEntryWithData::MarkAsBookmark( |
| 141 const BookmarkModel::URLAndTitle& bookmark) { |
| 142 is_bookmark_ = true; |
| 143 bookmark_title_ = bookmark.title; |
| 144 } |
| 145 |
| 146 // static |
| 147 bool DeltaFileEntryWithData::IsValidId(const std::string& url) { |
| 148 return url.size() <= kIdLengthLimit; |
| 149 } |
| 150 |
| 151 } // namespace history_report |
OLD | NEW |