OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" | |
6 | |
7 #include <set> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/metrics/histogram.h" | |
11 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
12 #include "net/base/io_buffer.h" | |
13 #include "net/url_request/url_request.h" | |
14 | |
15 namespace content { | |
16 namespace { | |
gavinp
2012/07/31 19:26:18
Why not move the anon namespace out of content? It
frankwang
2012/07/31 20:26:18
Done.
darin (slow to review)
2012/07/31 20:35:22
Sorry to contradict, but I think it is generally b
| |
17 | |
18 class GlobalDuplicateRecords { | |
19 public: | |
20 static GlobalDuplicateRecords* GetInstance() { | |
21 static GlobalDuplicateRecords records; | |
22 return &records; | |
23 } | |
24 | |
25 std::set<MH_UINT32>* content_matches() { | |
26 return &content_matches_; | |
27 } | |
28 | |
29 std::set<MH_UINT32>* content_and_url_matches() { | |
30 return &content_and_url_matches_; | |
31 } | |
32 | |
33 private: | |
34 GlobalDuplicateRecords() {} | |
35 ~GlobalDuplicateRecords() {} | |
36 | |
37 std::set<MH_UINT32> content_matches_; | |
38 std::set<MH_UINT32> content_and_url_matches_; | |
39 }; | |
40 | |
41 } // namespace | |
42 | |
43 DuplicateContentResourceHandler::DuplicateContentResourceHandler( | |
44 scoped_ptr<ResourceHandler> next_handler, | |
45 ResourceType::Type resource_type, | |
46 net::URLRequest* request) | |
47 : LayeredResourceHandler(next_handler.Pass()), | |
48 resource_type_(resource_type), | |
49 bytes_read_(0), | |
50 request_(request), | |
51 pmurhash_ph1_(0), | |
52 pmurhash_pcarry_(0) { | |
53 } | |
54 | |
55 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { | |
56 } | |
57 | |
58 bool DuplicateContentResourceHandler::OnWillRead( | |
59 int request_id, | |
60 net::IOBuffer** buf, | |
61 int* buf_size, | |
62 int min_size) { | |
63 DCHECK_EQ(-1, min_size); | |
64 | |
65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
66 return false; | |
67 read_buffer_ = *buf; | |
68 return true; | |
69 } | |
70 | |
71 bool DuplicateContentResourceHandler::OnReadCompleted( | |
gavinp
2012/07/31 19:26:18
You can move the first parameter up here, and inde
frankwang
2012/07/31 20:26:18
Is that the format? It seems like the parameters n
gavinp
2012/07/31 20:29:16
Each parameter must be on its own line, but it is
darin (slow to review)
2012/07/31 20:35:22
gavin is correct. sorry, i assumed since you had
| |
72 int request_id, | |
73 int bytes_read, | |
74 bool* defer) { | |
75 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
76 read_buffer_->data(), bytes_read); | |
77 bytes_read_ += bytes_read; | |
78 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
79 } | |
80 | |
81 bool DuplicateContentResourceHandler::OnResponseCompleted( | |
82 int request_id, | |
83 const net::URLRequestStatus& status, | |
84 const std::string& security_info) { | |
85 | |
86 if (status.is_success()) | |
87 RecordContentMetrics(); | |
88 | |
89 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
90 } | |
91 | |
92 void DuplicateContentResourceHandler::RecordContentMetrics() { | |
93 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | |
94 pmurhash_pcarry_, bytes_read_); | |
95 | |
96 // Combine the contents_hash with the url, so we can test if future content | |
97 // identical resources have the same original url or not. | |
98 MH_UINT32 hashed_with_url; | |
99 const std::string& url_spec = request_->url().spec(); | |
100 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
101 url_spec.data(), url_spec.length()); | |
102 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | |
103 url_spec.length() + bytes_read_); | |
104 | |
105 DVLOG(4) << "url: " << url_spec; | |
106 DVLOG(4) << "contents hash: " << contents_hash; | |
107 DVLOG(4) << "hash with url: " << hashed_with_url; | |
108 | |
109 std::set<MH_UINT32>* content_matches = | |
110 GlobalDuplicateRecords::GetInstance()->content_matches(); | |
111 std::set<MH_UINT32>* content_and_url_matches = | |
112 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | |
113 | |
114 const bool did_match_contents = content_matches->count(contents_hash) > 0; | |
115 const bool did_match_contents_and_url = | |
116 content_and_url_matches->count(hashed_with_url) > 0; | |
117 | |
118 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); | |
119 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents && | |
gavinp
2012/07/31 19:26:18
Could we break this line after the ", " for better
frankwang
2012/07/31 20:26:18
Yup, it does.
| |
120 did_match_contents_and_url); | |
121 if (did_match_contents && !did_match_contents_and_url) { | |
122 content_and_url_matches->insert(hashed_with_url); | |
123 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, | |
124 1, 0x7FFFFFFF, 50); | |
125 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", | |
126 resource_type_, ResourceType::LAST_TYPE); | |
127 } | |
128 content_matches->insert(contents_hash); | |
129 content_and_url_matches->insert(hashed_with_url); | |
130 | |
131 bytes_read_ = 0; | |
132 read_buffer_ = NULL; | |
133 } | |
134 | |
135 } // namespace content | |
OLD | NEW |