Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(141)

Side by Side Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_resource_handler.h"
6
7 #include <set>
8
9 #include "base/logging.h"
10 #include "base/memory/singleton.h"
11 #include "base/metrics/histogram.h"
12 #include "content/browser/renderer_host/resource_request_info_impl.h"
13 #include "net/base/io_buffer.h"
14 #include "net/url_request/url_request.h"
15 #include "third_party/smhasher/src/PMurHash.h"
16
17
18 namespace content {
19
20 namespace {
21
22 class GlobalDuplicateRecords {
gavinp 2012/07/20 01:37:59 I advise you to do some grepfights in our code for
frankwang 2012/07/20 04:50:00 I grepped for a bunch of Singletons. There is noth
gavinp 2012/07/20 11:38:46 SGTM.
23 public:
24 static GlobalDuplicateRecords* GetInstance(){
25 return Singleton<GlobalDuplicateRecords>::get();
26 }
27
28 std::set<uint32>* content_matches() {
29 return &content_matches_;
30 }
31
32 std::set<uint32>* content_and_url_matches() {
33 return &content_and_url_matches_;
34 }
35
36 int* bytes_seen() {
gavinp 2012/07/20 01:37:59 The names should be the same. Either gain a total
frankwang 2012/07/20 04:50:00 Done.
37 return &total_bytes_seen_;
38 }
39
40 private:
41 friend class Singleton<GlobalDuplicateRecords>;
42 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>;
43
44 GlobalDuplicateRecords() {}
45 ~GlobalDuplicateRecords() {}
46
47 std::set<uint32> content_matches_;
48 std::set<uint32> content_and_url_matches_;
49 int total_bytes_seen_;
50 };
51
52 } // namespace
53
54 DuplicateResourceHandler::DuplicateResourceHandler(
55 scoped_ptr<ResourceHandler> next_handler,
56 ResourceType::Type resource_type,
57 net::URLRequest* request)
58 : LayeredResourceHandler(next_handler.Pass()),
59 resource_type_(resource_type),
60 pmurhash_ph1_(0),
61 pmurhash_pcarry_(0),
62 bytes_read_(0),
63 request_(request) {
64 }
65
66 DuplicateResourceHandler::~DuplicateResourceHandler() {
67 }
68
69 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,
70 int* buf_size, int min_size) {
71 DCHECK_EQ(-1, min_size);
72
73 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
74 return false;
75 read_buffer_ = *buf;
76 return true;
77 }
78
79 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,
80 bool* defer) {
81 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
82 read_buffer_->data(), bytes_read);
83 bytes_read_ += bytes_read;
84 int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen();
85 *bytes_seen += bytes_read;
gavinp 2012/07/20 01:37:59 I think you're introducing a confusing error condi
frankwang 2012/07/20 04:50:00 Done.
86 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
87 }
88
89 bool DuplicateResourceHandler::OnResponseCompleted(
90 int request_id,
91 const net::URLRequestStatus& status,
92 const std::string& security_info) {
93
94 if (status.status() != net::URLRequestStatus::SUCCESS)
95 return next_handler_->OnResponseCompleted(request_id,
96 status, security_info);
97
98 uint32 resource_hash = PMurHash32_Result(pmurhash_ph1_,
gavinp 2012/07/20 01:37:59 I think contents_hash is a better name.
frankwang 2012/07/20 04:50:00 Done.
99 pmurhash_pcarry_, bytes_read_);
100
101 // Hash url into the resource to see whether it is from the same or
gavinp 2012/07/20 01:37:59 // Combine the contents_hash with the url, so we c
frankwang 2012/07/20 04:50:00 Changed.
102 // different url.
103 uint32 hashed_with_url;
104 const std::string url_spec = request_->url().spec();
105 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
106 url_spec.data(), url_spec.length());
107 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
108 url_spec.length() + bytes_read_);
109
110 DVLOG(4) << "url: " << url_spec;
111 DVLOG(4) << "resource hash: " << resource_hash;
112 DVLOG(4) << "hash with url: " << hashed_with_url;
113
114 std::set<uint32>* content_hashes =
gavinp 2012/07/20 01:37:59 These automatics should have names identical to th
frankwang 2012/07/20 04:50:00 Done.
115 GlobalDuplicateRecords::GetInstance()->content_matches();
116 std::set<uint32>* content_and_url_hashes =
117 GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
118
119 const bool did_match_contents = content_hashes->count(resource_hash);
120 const bool did_match_contents_and_url =
121 content_and_url_hashes->count(hashed_with_url);
122
123 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
124 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
125 did_match_contents_and_url);
126 if (did_match_contents && !did_match_contents_and_url) {
127 int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen();
128 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
129 1, 0x7FFFFFFF, 50);
130 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
131 resource_type_, ResourceType::LAST_TYPE);
132 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.TotalBytesSeen", *bytes_seen,
gavinp 2012/07/20 01:37:59 This histogram doesn't seem very useful to me. Can
frankwang 2012/07/20 04:50:00 I am using this histogram in a similar way to leng
gavinp 2012/07/20 11:38:46 Aha. So a fine way to get length of browser sessio
frankwang 2012/07/20 17:51:38 I think browser session length should be sufficien
133 1, 0x7FFFFFFF, 50);
134 content_and_url_hashes->insert(hashed_with_url);
gavinp 2012/07/20 01:37:59 Move this line above all this histograms, so the c
frankwang 2012/07/20 04:50:00 Done.
135 } else {
136 content_hashes->insert(resource_hash);
137 content_and_url_hashes->insert(hashed_with_url);
138 }
139
140 bytes_read_ = 0;
141 read_buffer_ = NULL;
142 return next_handler_->OnResponseCompleted(request_id, status, security_info);
143 }
144
145 } // namespace content
146
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698