| Index: chrome/browser/android/history_report/delta_file_commons.cc | 
| diff --git a/chrome/browser/android/history_report/delta_file_commons.cc b/chrome/browser/android/history_report/delta_file_commons.cc | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..027f82d9c7c84d99b36360105cea81dfb6560b8e | 
| --- /dev/null | 
| +++ b/chrome/browser/android/history_report/delta_file_commons.cc | 
| @@ -0,0 +1,151 @@ | 
| +// Copyright 2015 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +#include "chrome/browser/android/history_report/delta_file_commons.h" | 
| + | 
| +#include <iomanip> | 
| + | 
| +#include "base/strings/string_number_conversions.h" | 
| +#include "base/strings/utf_string_conversions.h" | 
| +#include "crypto/sha2.h" | 
| +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" | 
| + | 
| +using bookmarks::BookmarkModel; | 
| +using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES; | 
| +using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES; | 
| +using net::registry_controlled_domains::GetRegistryLength; | 
| + | 
| +namespace { | 
| + | 
| +const int kBookmarkScoreBonusMultiplier = 3; | 
| +const size_t kIdLengthLimit = 256; | 
| +const int kSHA256ByteSize = 32; | 
| +const size_t kUrlLengthLimit = 20 * 1024 * 1024; // 20M | 
| +const size_t kUrlLengthWidth = 8; | 
| + | 
| +void StripTopLevelDomain(std::string* host) { | 
| +  size_t registry_length = GetRegistryLength( | 
| +      *host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES); | 
| +  if (registry_length != 0 && registry_length != std::string::npos) | 
| +    host->erase(host->length() - (registry_length + 1)); | 
| +} | 
| + | 
| +void StripCommonSubDomains(std::string* host) { | 
| +  std::string www_prefix("www."); | 
| +  std::string ww2_prefix("ww2."); | 
| +  if (host->compare(0, www_prefix.size(), www_prefix) == 0) { | 
| +    host->erase(0, www_prefix.size()); | 
| +  } else if (host->compare(0, ww2_prefix.size(), ww2_prefix) == 0) { | 
| +    host->erase(0, ww2_prefix.size()); | 
| +  } | 
| +} | 
| + | 
| +}  // namespace | 
| + | 
| +namespace history_report { | 
| + | 
| +DeltaFileEntryWithData::DeltaFileEntryWithData(DeltaFileEntry entry) | 
| +    : entry_(entry), | 
| +      data_set_(false), | 
| +      is_bookmark_(false) {} | 
| + | 
| +DeltaFileEntryWithData::~DeltaFileEntryWithData() {} | 
| + | 
| +int64 DeltaFileEntryWithData::SeqNo() const { | 
| +  return entry_.seq_no(); | 
| +} | 
| + | 
| +std::string DeltaFileEntryWithData::Type() const { | 
| +  // If deletion entry has data then it's not a real deletion entry | 
| +  // but an update entry. Real deletion entry never has data. | 
| +  if (data_set_) return "add"; | 
| +  return entry_.type(); | 
| +} | 
| + | 
| +// Generates a unique ID for a given URL. | 
| +// It must be shorter than or equal to |kIdLengthLimit| characters. | 
| +// If URL is shorter than or equal to |kIdLengthLimit| then ID is the URL | 
| +// itself. Otherwise it has a form of 3 concatenated parts: | 
| +//  1. Length of URL. Zero-padded integer to width |kUrlLengthWidth|, | 
| +//     because URLs are limited to 20M in Chrome. | 
| +//  2. SHA-256 of URL which takes 64 characters. | 
| +//  3. Prefix of URL of size |kIdLengthLimit| - 64 - |kUrlLengthWidth|. | 
| +std::string DeltaFileEntryWithData::UrlToId(const std::string& url) { | 
| +  if (url.size() > kUrlLengthLimit) { | 
| +    return "error: url too long"; | 
| +  } | 
| + | 
| +  if (IsValidId(url)) { | 
| +    return url; | 
| +  } | 
| + | 
| +  std::stringstream id; | 
| + | 
| +  // 1. Zero-padded URL length to width |kUrlLengthWidth|. | 
| +  id << std::setfill('0') << std::setw(kUrlLengthWidth) << url.size(); | 
| + | 
| +  // 2. SHA-256 of URL. | 
| +  uint8 hash[kSHA256ByteSize]; | 
| +  crypto::SHA256HashString(url, hash, sizeof(hash)); | 
| +  id << base::HexEncode(hash, sizeof(hash)); | 
| + | 
| +  // 3. Prefix of URL to fill rest of the space. | 
| +  id << url.substr(0, kIdLengthLimit - 2 * kSHA256ByteSize - kUrlLengthWidth); | 
| + | 
| +  return id.str(); | 
| +} | 
| + | 
| +// ID which identifies URL of this entry. | 
| +std::string DeltaFileEntryWithData::Id() const { | 
| +  return UrlToId(entry_.url()); | 
| +} | 
| + | 
| +std::string DeltaFileEntryWithData::Url() const { | 
| +  return entry_.url(); | 
| +} | 
| + | 
| +base::string16 DeltaFileEntryWithData::Title() const { | 
| +  if (!Valid()) return base::UTF8ToUTF16(""); | 
| +  if (is_bookmark_ && !bookmark_title_.empty()) return bookmark_title_; | 
| +  if (data_.title().empty()) return base::UTF8ToUTF16(data_.url().host()); | 
| +  return data_.title(); | 
| +} | 
| + | 
| +int32 DeltaFileEntryWithData::Score() const { | 
| +  if (!Valid()) return 0; | 
| +  int32 score = data_.visit_count() + data_.typed_count(); | 
| +  if (is_bookmark_) score = (score + 1) * kBookmarkScoreBonusMultiplier; | 
| +  return score; | 
| +} | 
| + | 
| +std::string DeltaFileEntryWithData::IndexedUrl() const { | 
| +  if (!Valid()) return ""; | 
| +  std::string indexed_url = data_.url().host(); | 
| +  StripTopLevelDomain(&indexed_url); | 
| +  StripCommonSubDomains(&indexed_url); | 
| +  return indexed_url; | 
| +} | 
| + | 
| +bool DeltaFileEntryWithData::Valid() const { | 
| +  return entry_.type() == "del" || is_bookmark_ || | 
| +      (data_set_ && !data_.hidden()); | 
| +} | 
| + | 
| +void DeltaFileEntryWithData::SetData(const history::URLRow& data) { | 
| +  data_set_ = true; | 
| +  data_ = data; | 
| +} | 
| + | 
| +void DeltaFileEntryWithData::MarkAsBookmark( | 
| +    const BookmarkModel::URLAndTitle& bookmark) { | 
| +  is_bookmark_ = true; | 
| +  bookmark_title_ = bookmark.title; | 
| +} | 
| + | 
| +// static | 
| +bool DeltaFileEntryWithData::IsValidId(const std::string& url) { | 
| +  return url.size() <= kIdLengthLimit; | 
| +} | 
| + | 
| +}  // namespace history_report | 
|  |