Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Side by Side Diff: content/browser/renderer_host/duplicate_content_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h"
6
7 #include <set>
8
9 #include "base/logging.h"
10 #include "base/metrics/histogram.h"
11 #include "content/browser/renderer_host/resource_request_info_impl.h"
12 #include "net/base/io_buffer.h"
13 #include "net/url_request/url_request.h"
14
15 namespace content {
16 namespace {
17
18 class GlobalDuplicateRecords {
19 public:
20 static GlobalDuplicateRecords* GetInstance() {
21 static GlobalDuplicateRecords records;
22 return &records;
23 }
24
25 std::set<MH_UINT32>* content_matches() {
26 return &content_matches_;
27 }
28
29 std::set<MH_UINT32>* content_and_url_matches() {
30 return &content_and_url_matches_;
31 }
32
33 private:
34 GlobalDuplicateRecords() {}
35 ~GlobalDuplicateRecords() {}
36
37 std::set<MH_UINT32> content_matches_;
38 std::set<MH_UINT32> content_and_url_matches_;
39 };
40
41 } // namespace
42
43 DuplicateContentResourceHandler::DuplicateContentResourceHandler(
44 scoped_ptr<ResourceHandler> next_handler,
45 ResourceType::Type resource_type,
46 net::URLRequest* request)
47 : LayeredResourceHandler(next_handler.Pass()),
48 resource_type_(resource_type),
49 bytes_read_(0),
50 request_(request),
51 pmurhash_ph1_(0),
52 pmurhash_pcarry_(0) {
53 }
54
55 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() {
56 }
57
58 bool DuplicateContentResourceHandler::
59 OnWillRead(int request_id, net::IOBuffer** buf,
60 int* buf_size, int min_size) {
darin (slow to review) 2012/07/31 18:21:03 nit: indentation nit: formatting, should look lik
61 DCHECK_EQ(-1, min_size);
62
63 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
64 return false;
65 read_buffer_ = *buf;
66 return true;
67 }
68
69 bool DuplicateContentResourceHandler::
70 OnReadCompleted(int request_id, int bytes_read, bool* defer) {
darin (slow to review) 2012/07/31 18:21:03 ditto... you formatted correctly for OnResponseCom
71 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
72 read_buffer_->data(), bytes_read);
73 bytes_read_ += bytes_read;
74 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
75 }
76
77 bool DuplicateContentResourceHandler::OnResponseCompleted(
78 int request_id,
79 const net::URLRequestStatus& status,
80 const std::string& security_info) {
81
82 if (status.is_success())
83 RecordContentMetrics();
84
85 return next_handler_->OnResponseCompleted(request_id, status, security_info);
86 }
87
88 void DuplicateContentResourceHandler::RecordContentMetrics() {
89 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_,
90 pmurhash_pcarry_, bytes_read_);
91
92 // Combine the contents_hash with the url, so we can test if future content
93 // identical resources have the same original url or not.
94 MH_UINT32 hashed_with_url;
95 const std::string& url_spec = request_->url().spec();
96 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
97 url_spec.data(), url_spec.length());
98 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
99 url_spec.length() + bytes_read_);
100
101 /*DVLOG(4) << "url: " << url_spec;
darin (slow to review) 2012/07/31 18:21:03 nit: either uncomment this code or delete the code
102 DVLOG(4) << "contents hash: " << contents_hash;
103 DVLOG(4) << "hash with url: " << hashed_with_url;*/
104
105 std::set<MH_UINT32>* content_matches =
106 GlobalDuplicateRecords::GetInstance()->content_matches();
107 std::set<MH_UINT32>* content_and_url_matches =
108 GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
109
110 const bool did_match_contents = content_matches->count(contents_hash) > 0;
111 const bool did_match_contents_and_url =
112 content_and_url_matches->count(hashed_with_url) > 0;
113
114 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
115 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
116 did_match_contents_and_url);
117 if (did_match_contents && !did_match_contents_and_url) {
118 content_and_url_matches->insert(hashed_with_url);
119 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
120 1, 0x7FFFFFFF, 50);
121 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
122 resource_type_, ResourceType::LAST_TYPE);
123 }
124 content_matches->insert(contents_hash);
125 content_and_url_matches->insert(hashed_with_url);
126
127 bytes_read_ = 0;
128 read_buffer_ = NULL;
129 }
130
131 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698