OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" |
| 6 |
| 7 #include <set> |
| 8 |
| 9 #include "base/lazy_instance.h" |
| 10 #include "base/logging.h" |
| 11 #include "base/metrics/histogram.h" |
| 12 #include "content/browser/renderer_host/resource_request_info_impl.h" |
| 13 #include "net/base/io_buffer.h" |
| 14 #include "net/url_request/url_request.h" |
| 15 |
| 16 namespace content { |
| 17 namespace { |
| 18 |
| 19 class GlobalDuplicateRecords { |
| 20 public: |
| 21 static GlobalDuplicateRecords* GetInstance() { |
| 22 static base::LazyInstance<GlobalDuplicateRecords>::Leaky records; |
| 23 return records.Pointer(); |
| 24 } |
| 25 |
| 26 std::set<MH_UINT32>* content_matches() { |
| 27 return &content_matches_; |
| 28 } |
| 29 |
| 30 std::set<MH_UINT32>* content_and_url_matches() { |
| 31 return &content_and_url_matches_; |
| 32 } |
| 33 |
| 34 GlobalDuplicateRecords() {} |
| 35 ~GlobalDuplicateRecords() {} |
| 36 |
| 37 private: |
| 38 std::set<MH_UINT32> content_matches_; |
| 39 std::set<MH_UINT32> content_and_url_matches_; |
| 40 }; |
| 41 |
| 42 } // namespace |
| 43 |
| 44 DuplicateContentResourceHandler::DuplicateContentResourceHandler( |
| 45 scoped_ptr<ResourceHandler> next_handler, |
| 46 ResourceType::Type resource_type, |
| 47 net::URLRequest* request) |
| 48 : LayeredResourceHandler(next_handler.Pass()), |
| 49 resource_type_(resource_type), |
| 50 bytes_read_(0), |
| 51 request_(request), |
| 52 pmurhash_ph1_(0), |
| 53 pmurhash_pcarry_(0) { |
| 54 } |
| 55 |
| 56 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { |
| 57 } |
| 58 |
| 59 bool DuplicateContentResourceHandler::OnWillRead(int request_id, |
| 60 net::IOBuffer** buf, |
| 61 int* buf_size, |
| 62 int min_size) { |
| 63 DCHECK_EQ(-1, min_size); |
| 64 |
| 65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
| 66 return false; |
| 67 read_buffer_ = *buf; |
| 68 return true; |
| 69 } |
| 70 |
| 71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
| 72 int bytes_read, |
| 73 bool* defer) { |
| 74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 75 read_buffer_->data(), bytes_read); |
| 76 bytes_read_ += bytes_read; |
| 77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
| 78 } |
| 79 |
| 80 bool DuplicateContentResourceHandler::OnResponseCompleted( |
| 81 int request_id, |
| 82 const net::URLRequestStatus& status, |
| 83 const std::string& security_info) { |
| 84 |
| 85 if (status.is_success()) |
| 86 RecordContentMetrics(); |
| 87 |
| 88 return next_handler_->OnResponseCompleted(request_id, status, security_info); |
| 89 } |
| 90 |
| 91 void DuplicateContentResourceHandler::RecordContentMetrics() { |
| 92 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, |
| 93 pmurhash_pcarry_, bytes_read_); |
| 94 |
| 95 // Combine the contents_hash with the url, so we can test if future content |
| 96 // identical resources have the same original url or not. |
| 97 MH_UINT32 hashed_with_url; |
| 98 const std::string& url_spec = request_->url().spec(); |
| 99 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 100 url_spec.data(), url_spec.length()); |
| 101 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, |
| 102 url_spec.length() + bytes_read_); |
| 103 |
| 104 DVLOG(4) << "url: " << url_spec; |
| 105 DVLOG(4) << "contents hash: " << contents_hash; |
| 106 DVLOG(4) << "hash with url: " << hashed_with_url; |
| 107 |
| 108 std::set<MH_UINT32>* content_matches = |
| 109 GlobalDuplicateRecords::GetInstance()->content_matches(); |
| 110 std::set<MH_UINT32>* content_and_url_matches = |
| 111 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
| 112 |
| 113 const bool did_match_contents = content_matches->count(contents_hash) > 0; |
| 114 const bool did_match_contents_and_url = |
| 115 content_and_url_matches->count(hashed_with_url) > 0; |
| 116 |
| 117 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); |
| 118 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", |
| 119 did_match_contents && did_match_contents_and_url); |
| 120 if (did_match_contents && !did_match_contents_and_url) { |
| 121 content_and_url_matches->insert(hashed_with_url); |
| 122 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, |
| 123 1, 0x7FFFFFFF, 50); |
| 124 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", |
| 125 resource_type_, ResourceType::LAST_TYPE); |
| 126 } |
| 127 content_matches->insert(contents_hash); |
| 128 content_and_url_matches->insert(hashed_with_url); |
| 129 |
| 130 bytes_read_ = 0; |
| 131 read_buffer_ = NULL; |
| 132 } |
| 133 |
| 134 } // namespace content |
OLD | NEW |