Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(409)

Side by Side Diff: content/browser/renderer_host/duplicate_content_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h"
6
7 #include <set>
8
9 #include "base/logging.h"
10 #include "base/memory/singleton.h"
11 #include "base/metrics/histogram.h"
12 #include "content/browser/renderer_host/resource_request_info_impl.h"
13 #include "net/base/io_buffer.h"
14 #include "net/url_request/url_request.h"
15
16 namespace content {
17
darin (slow to review) 2012/07/25 22:02:06 nit: no new line here
frankwang 2012/07/26 21:46:19 Done.
18 namespace {
19
20 class GlobalDuplicateRecords {
21 public:
darin (slow to review) 2012/07/25 22:02:06 nit: indentation
frankwang 2012/07/26 21:46:19 Done.
22 static GlobalDuplicateRecords* GetInstance() {
23 return Singleton<GlobalDuplicateRecords>::get();
darin (slow to review) 2012/07/25 22:02:06 we should only use Singleton<> for static objects
frankwang 2012/07/26 21:46:19 Done.
24 }
25
26 std::set<MH_UINT32>* content_matches() {
27 return &content_matches_;
28 }
29
30 std::set<MH_UINT32>* content_and_url_matches() {
31 return &content_and_url_matches_;
32 }
33
34 private:
35 friend class Singleton<GlobalDuplicateRecords>;
36 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>;
37
38 GlobalDuplicateRecords() {}
39 ~GlobalDuplicateRecords() {}
40
41 std::set<MH_UINT32> content_matches_;
darin (slow to review) 2012/07/25 22:02:06 how big can these sets grow? do we need to worry
frankwang 2012/07/26 21:46:19 We crawled the Alexa top 25,000 list and found tha
42 std::set<MH_UINT32> content_and_url_matches_;
43 };
44 } // namespace
darin (slow to review) 2012/07/25 22:02:06 nit: add a new line above the close of the namespa
frankwang 2012/07/26 21:46:19 Done.
45
46 DuplicateContentResourceHandler::DuplicateContentResourceHandler(
47 scoped_ptr<ResourceHandler> next_handler,
48 ResourceType::Type resource_type,
49 net::URLRequest* request)
50 : LayeredResourceHandler(next_handler.Pass()),
51 resource_type_(resource_type),
52 bytes_read_(0),
53 request_(request),
54 pmurhash_ph1_(0),
55 pmurhash_pcarry_(0) {
56 }
57
58 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() {
59 }
60
61 bool DuplicateContentResourceHandler::
62 OnWillRead(int request_id, net::IOBuffer** buf,
63 int* buf_size, int min_size) {
64 DCHECK_EQ(-1, min_size);
65
66 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
67 return false;
68 read_buffer_ = *buf;
69 return true;
70 }
71
72 bool DuplicateContentResourceHandler::
73 OnReadCompleted(int request_id, int bytes_read, bool* defer) {
74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
darin (slow to review) 2012/07/25 22:02:06 how do we know that this function isn't going to s
frankwang 2012/07/26 21:46:19 PMurHash is relatively fast. The MurMur hash famil
75 read_buffer_->data(), bytes_read);
76 bytes_read_ += bytes_read;
77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
78 }
79
80 bool DuplicateContentResourceHandler::OnResponseCompleted(
81 int request_id,
82 const net::URLRequestStatus& status,
83 const std::string& security_info) {
84
85 if (status.is_success())
86 DuplicateContentResourceHandler::RecordContentMetrics(status);
darin (slow to review) 2012/07/25 22:02:06 no need for the "DuplicateContentResourceHandler::
frankwang 2012/07/26 21:46:19 Done.
87
88 return next_handler_->OnResponseCompleted(request_id, status, security_info);
89 }
90
91 void DuplicateContentResourceHandler::
92 RecordContentMetrics(const net::URLRequestStatus& status) {
darin (slow to review) 2012/07/25 22:02:06 you don't seem to need this parameter.
frankwang 2012/07/26 21:46:19 Deleted.
93 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_,
darin (slow to review) 2012/07/25 22:02:06 what about the runtime of this function? is it co
frankwang 2012/07/26 21:46:19 Comment left above about PMurHash.
94 pmurhash_pcarry_, bytes_read_);
darin (slow to review) 2012/07/25 22:02:06 nit: indentation of the parameters
frankwang 2012/07/26 21:46:19 Done.
95
96 // Combine the contents_hash with the url, so we can test if future content
97 // identical resources have the same original url or not.
98 MH_UINT32 hashed_with_url;
99 const std::string url_spec = request_->url().spec();
darin (slow to review) 2012/07/25 22:02:06 nit: you should use a const ref here: "const std:
frankwang 2012/07/26 21:46:19 Done.
100 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
101 url_spec.data(), url_spec.length());
102 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
103 url_spec.length() + bytes_read_);
104
105 DVLOG(4) << "url: " << url_spec;
106 DVLOG(4) << "contents hash: " << contents_hash;
107 DVLOG(4) << "hash with url: " << hashed_with_url;
108
109 std::set<MH_UINT32>* content_matches =
110 GlobalDuplicateRecords::GetInstance()->content_matches();
111 std::set<MH_UINT32>* content_and_url_matches =
112 GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
113
114 const bool did_match_contents = content_matches->count(contents_hash) > 0;
115 const bool did_match_contents_and_url =
116 content_and_url_matches->count(hashed_with_url) > 0;
117
118 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
119 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
120 did_match_contents_and_url);
121 if (did_match_contents && !did_match_contents_and_url) {
122 content_and_url_matches->insert(hashed_with_url);
darin (slow to review) 2012/07/25 22:02:06 nit: indentation
frankwang 2012/07/26 21:46:19 Done.
123 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
124 1, 0x7FFFFFFF, 50);
125 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
126 resource_type_, ResourceType::LAST_TYPE);
127 }
128 content_matches->insert(contents_hash);
129 content_and_url_matches->insert(hashed_with_url);
130
131 bytes_read_ = 0;
132 read_buffer_ = NULL;
133 }
134 } // namespace content
darin (slow to review) 2012/07/25 22:02:06 nit: add a new line above the close of the namespa
frankwang 2012/07/26 21:46:19 Done.
135
darin (slow to review) 2012/07/25 22:02:06 nit: extraneous blank line?
frankwang 2012/07/26 21:46:19 Done.
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698