Index: content/browser/renderer_host/duplicate_resource_handler.cc |
=================================================================== |
--- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
+++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
@@ -0,0 +1,146 @@ |
+// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "content/browser/renderer_host/duplicate_resource_handler.h" |
+ |
+#include <set> |
+ |
+#include "base/logging.h" |
+#include "base/memory/singleton.h" |
+#include "base/metrics/histogram.h" |
+#include "content/browser/renderer_host/resource_request_info_impl.h" |
+#include "net/base/io_buffer.h" |
+#include "net/url_request/url_request.h" |
+#include "third_party/smhasher/src/PMurHash.h" |
+ |
+ |
+namespace content { |
+ |
+namespace { |
+ |
+class GlobalDuplicateRecords { |
gavinp
2012/07/20 01:37:59
I advise you to do some grepfights in our code for
frankwang
2012/07/20 04:50:00
I grepped for a bunch of Singletons. There is noth
gavinp
2012/07/20 11:38:46
SGTM.
|
+ public: |
+ static GlobalDuplicateRecords* GetInstance(){ |
+ return Singleton<GlobalDuplicateRecords>::get(); |
+ } |
+ |
+ std::set<uint32>* content_matches() { |
+ return &content_matches_; |
+ } |
+ |
+ std::set<uint32>* content_and_url_matches() { |
+ return &content_and_url_matches_; |
+ } |
+ |
+ int* bytes_seen() { |
gavinp
2012/07/20 01:37:59
The names should be the same. Either gain a total
frankwang
2012/07/20 04:50:00
Done.
|
+ return &total_bytes_seen_; |
+ } |
+ |
+ private: |
+ friend class Singleton<GlobalDuplicateRecords>; |
+ friend struct DefaultSingletonTraits<GlobalDuplicateRecords>; |
+ |
+ GlobalDuplicateRecords() {} |
+ ~GlobalDuplicateRecords() {} |
+ |
+ std::set<uint32> content_matches_; |
+ std::set<uint32> content_and_url_matches_; |
+ int total_bytes_seen_; |
+}; |
+ |
+} // namespace |
+ |
+DuplicateResourceHandler::DuplicateResourceHandler( |
+ scoped_ptr<ResourceHandler> next_handler, |
+ ResourceType::Type resource_type, |
+ net::URLRequest* request) |
+ : LayeredResourceHandler(next_handler.Pass()), |
+ resource_type_(resource_type), |
+ pmurhash_ph1_(0), |
+ pmurhash_pcarry_(0), |
+ bytes_read_(0), |
+ request_(request) { |
+} |
+ |
+DuplicateResourceHandler::~DuplicateResourceHandler() { |
+} |
+ |
+bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, |
+ int* buf_size, int min_size) { |
+ DCHECK_EQ(-1, min_size); |
+ |
+ if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
+ return false; |
+ read_buffer_ = *buf; |
+ return true; |
+} |
+ |
+bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, |
+ bool* defer) { |
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
+ read_buffer_->data(), bytes_read); |
+ bytes_read_ += bytes_read; |
+ int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen(); |
+ *bytes_seen += bytes_read; |
gavinp
2012/07/20 01:37:59
I think you're introducing a confusing error condi
frankwang
2012/07/20 04:50:00
Done.
|
+ return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
+} |
+ |
+bool DuplicateResourceHandler::OnResponseCompleted( |
+ int request_id, |
+ const net::URLRequestStatus& status, |
+ const std::string& security_info) { |
+ |
+ if (status.status() != net::URLRequestStatus::SUCCESS) |
+ return next_handler_->OnResponseCompleted(request_id, |
+ status, security_info); |
+ |
+ uint32 resource_hash = PMurHash32_Result(pmurhash_ph1_, |
gavinp
2012/07/20 01:37:59
I think contents_hash is a better name.
frankwang
2012/07/20 04:50:00
Done.
|
+ pmurhash_pcarry_, bytes_read_); |
+ |
+ // Hash url into the resource to see whether it is from the same or |
gavinp
2012/07/20 01:37:59
// Combine the contents_hash with the url, so we c
frankwang
2012/07/20 04:50:00
Changed.
|
+ // different url. |
+ uint32 hashed_with_url; |
+ const std::string url_spec = request_->url().spec(); |
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
+ url_spec.data(), url_spec.length()); |
+ hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, |
+ url_spec.length() + bytes_read_); |
+ |
+ DVLOG(4) << "url: " << url_spec; |
+ DVLOG(4) << "resource hash: " << resource_hash; |
+ DVLOG(4) << "hash with url: " << hashed_with_url; |
+ |
+ std::set<uint32>* content_hashes = |
gavinp
2012/07/20 01:37:59
These automatics should have names identical to th
frankwang
2012/07/20 04:50:00
Done.
|
+ GlobalDuplicateRecords::GetInstance()->content_matches(); |
+ std::set<uint32>* content_and_url_hashes = |
+ GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
+ |
+ const bool did_match_contents = content_hashes->count(resource_hash); |
+ const bool did_match_contents_and_url = |
+ content_and_url_hashes->count(hashed_with_url); |
+ |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents && |
+ did_match_contents_and_url); |
+ if (did_match_contents && !did_match_contents_and_url) { |
+ int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen(); |
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, |
+ 1, 0x7FFFFFFF, 50); |
+ UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", |
+ resource_type_, ResourceType::LAST_TYPE); |
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.TotalBytesSeen", *bytes_seen, |
gavinp
2012/07/20 01:37:59
This histogram doesn't seem very useful to me. Can
frankwang
2012/07/20 04:50:00
I am using this histogram in a similar way to leng
gavinp
2012/07/20 11:38:46
Aha. So a fine way to get length of browser sessio
frankwang
2012/07/20 17:51:38
I think browser session length should be sufficien
|
+ 1, 0x7FFFFFFF, 50); |
+ content_and_url_hashes->insert(hashed_with_url); |
gavinp
2012/07/20 01:37:59
Move this line above all this histograms, so the c
frankwang
2012/07/20 04:50:00
Done.
|
+ } else { |
+ content_hashes->insert(resource_hash); |
+ content_and_url_hashes->insert(hashed_with_url); |
+ } |
+ |
+ bytes_read_ = 0; |
+ read_buffer_ = NULL; |
+ return next_handler_->OnResponseCompleted(request_id, status, security_info); |
+} |
+ |
+} // namespace content |
+ |