OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/browser/renderer_host/duplicate_resource_handler.h" | |
6 | |
7 #include <set> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/memory/singleton.h" | |
11 #include "base/metrics/histogram.h" | |
12 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
13 #include "net/base/io_buffer.h" | |
14 #include "net/url_request/url_request.h" | |
15 #include "third_party/smhasher/src/PMurHash.h" | |
16 | |
17 | |
18 namespace content { | |
19 | |
20 namespace { | |
21 | |
22 class GlobalDuplicateRecords { | |
gavinp
2012/07/20 01:37:59
I advise you to do some grepfights in our code for
frankwang
2012/07/20 04:50:00
I grepped for a bunch of Singletons. There is noth
gavinp
2012/07/20 11:38:46
SGTM.
| |
23 public: | |
24 static GlobalDuplicateRecords* GetInstance(){ | |
25 return Singleton<GlobalDuplicateRecords>::get(); | |
26 } | |
27 | |
28 std::set<uint32>* content_matches() { | |
29 return &content_matches_; | |
30 } | |
31 | |
32 std::set<uint32>* content_and_url_matches() { | |
33 return &content_and_url_matches_; | |
34 } | |
35 | |
36 int* bytes_seen() { | |
gavinp
2012/07/20 01:37:59
The names should be the same. Either gain a total
frankwang
2012/07/20 04:50:00
Done.
| |
37 return &total_bytes_seen_; | |
38 } | |
39 | |
40 private: | |
41 friend class Singleton<GlobalDuplicateRecords>; | |
42 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>; | |
43 | |
44 GlobalDuplicateRecords() {} | |
45 ~GlobalDuplicateRecords() {} | |
46 | |
47 std::set<uint32> content_matches_; | |
48 std::set<uint32> content_and_url_matches_; | |
49 int total_bytes_seen_; | |
50 }; | |
51 | |
52 } // namespace | |
53 | |
54 DuplicateResourceHandler::DuplicateResourceHandler( | |
55 scoped_ptr<ResourceHandler> next_handler, | |
56 ResourceType::Type resource_type, | |
57 net::URLRequest* request) | |
58 : LayeredResourceHandler(next_handler.Pass()), | |
59 resource_type_(resource_type), | |
60 pmurhash_ph1_(0), | |
61 pmurhash_pcarry_(0), | |
62 bytes_read_(0), | |
63 request_(request) { | |
64 } | |
65 | |
66 DuplicateResourceHandler::~DuplicateResourceHandler() { | |
67 } | |
68 | |
69 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | |
70 int* buf_size, int min_size) { | |
71 DCHECK_EQ(-1, min_size); | |
72 | |
73 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
74 return false; | |
75 read_buffer_ = *buf; | |
76 return true; | |
77 } | |
78 | |
79 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | |
80 bool* defer) { | |
81 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
82 read_buffer_->data(), bytes_read); | |
83 bytes_read_ += bytes_read; | |
84 int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen(); | |
85 *bytes_seen += bytes_read; | |
gavinp
2012/07/20 01:37:59
I think you're introducing a confusing error condi
frankwang
2012/07/20 04:50:00
Done.
| |
86 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
87 } | |
88 | |
89 bool DuplicateResourceHandler::OnResponseCompleted( | |
90 int request_id, | |
91 const net::URLRequestStatus& status, | |
92 const std::string& security_info) { | |
93 | |
94 if (status.status() != net::URLRequestStatus::SUCCESS) | |
95 return next_handler_->OnResponseCompleted(request_id, | |
96 status, security_info); | |
97 | |
98 uint32 resource_hash = PMurHash32_Result(pmurhash_ph1_, | |
gavinp
2012/07/20 01:37:59
I think contents_hash is a better name.
frankwang
2012/07/20 04:50:00
Done.
| |
99 pmurhash_pcarry_, bytes_read_); | |
100 | |
101 // Hash url into the resource to see whether it is from the same or | |
gavinp
2012/07/20 01:37:59
// Combine the contents_hash with the url, so we c
frankwang
2012/07/20 04:50:00
Changed.
| |
102 // different url. | |
103 uint32 hashed_with_url; | |
104 const std::string url_spec = request_->url().spec(); | |
105 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
106 url_spec.data(), url_spec.length()); | |
107 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | |
108 url_spec.length() + bytes_read_); | |
109 | |
110 DVLOG(4) << "url: " << url_spec; | |
111 DVLOG(4) << "resource hash: " << resource_hash; | |
112 DVLOG(4) << "hash with url: " << hashed_with_url; | |
113 | |
114 std::set<uint32>* content_hashes = | |
gavinp
2012/07/20 01:37:59
These automatics should have names identical to th
frankwang
2012/07/20 04:50:00
Done.
| |
115 GlobalDuplicateRecords::GetInstance()->content_matches(); | |
116 std::set<uint32>* content_and_url_hashes = | |
117 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | |
118 | |
119 const bool did_match_contents = content_hashes->count(resource_hash); | |
120 const bool did_match_contents_and_url = | |
121 content_and_url_hashes->count(hashed_with_url); | |
122 | |
123 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); | |
124 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents && | |
125 did_match_contents_and_url); | |
126 if (did_match_contents && !did_match_contents_and_url) { | |
127 int* bytes_seen = GlobalDuplicateRecords::GetInstance()->bytes_seen(); | |
128 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, | |
129 1, 0x7FFFFFFF, 50); | |
130 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", | |
131 resource_type_, ResourceType::LAST_TYPE); | |
132 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.TotalBytesSeen", *bytes_seen, | |
gavinp
2012/07/20 01:37:59
This histogram doesn't seem very useful to me. Can
frankwang
2012/07/20 04:50:00
I am using this histogram in a similar way to leng
gavinp
2012/07/20 11:38:46
Aha. So a fine way to get length of browser sessio
frankwang
2012/07/20 17:51:38
I think browser session length should be sufficien
| |
133 1, 0x7FFFFFFF, 50); | |
134 content_and_url_hashes->insert(hashed_with_url); | |
gavinp
2012/07/20 01:37:59
Move this line above all this histograms, so the c
frankwang
2012/07/20 04:50:00
Done.
| |
135 } else { | |
136 content_hashes->insert(resource_hash); | |
137 content_and_url_hashes->insert(hashed_with_url); | |
138 } | |
139 | |
140 bytes_read_ = 0; | |
141 read_buffer_ = NULL; | |
142 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
143 } | |
144 | |
145 } // namespace content | |
146 | |
OLD | NEW |