OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" | 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" |
6 | 6 |
7 #include <set> | 7 #include <set> |
8 | 8 |
9 #include "base/lazy_instance.h" | 9 #include "base/lazy_instance.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
44 DuplicateContentResourceHandler::DuplicateContentResourceHandler( | 44 DuplicateContentResourceHandler::DuplicateContentResourceHandler( |
45 scoped_ptr<ResourceHandler> next_handler, | 45 scoped_ptr<ResourceHandler> next_handler, |
46 ResourceType::Type resource_type, | 46 ResourceType::Type resource_type, |
47 net::URLRequest* request) | 47 net::URLRequest* request) |
48 : LayeredResourceHandler(next_handler.Pass()), | 48 : LayeredResourceHandler(next_handler.Pass()), |
49 resource_type_(resource_type), | 49 resource_type_(resource_type), |
50 bytes_read_(0), | 50 bytes_read_(0), |
51 request_(request), | 51 request_(request), |
52 pmurhash_ph1_(0), | 52 pmurhash_ph1_(0), |
53 pmurhash_pcarry_(0) { | 53 pmurhash_pcarry_(0) { |
| 54 // Ignore everything that's not http/https. Specifically, exclude data and |
| 55 // blob URLs which can be generated by content and cause the maintained sets |
| 56 // to grow without bounds. |
| 57 const GURL& url = request_->url(); |
| 58 track_request_ = url.SchemeIs("http") || url.SchemeIs("https"); |
54 } | 59 } |
55 | 60 |
56 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { | 61 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { |
57 } | 62 } |
58 | 63 |
59 bool DuplicateContentResourceHandler::OnWillRead(int request_id, | 64 bool DuplicateContentResourceHandler::OnWillRead(int request_id, |
60 net::IOBuffer** buf, | 65 net::IOBuffer** buf, |
61 int* buf_size, | 66 int* buf_size, |
62 int min_size) { | 67 int min_size) { |
63 DCHECK_EQ(-1, min_size); | 68 DCHECK_EQ(-1, min_size); |
64 | 69 |
65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | 70 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
66 return false; | 71 return false; |
67 read_buffer_ = *buf; | 72 read_buffer_ = *buf; |
68 return true; | 73 return true; |
69 } | 74 } |
70 | 75 |
71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, | 76 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
72 int bytes_read, | 77 int bytes_read, |
73 bool* defer) { | 78 bool* defer) { |
74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 79 if (track_request_) { |
75 read_buffer_->data(), bytes_read); | 80 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
76 bytes_read_ += bytes_read; | 81 read_buffer_->data(), bytes_read); |
| 82 bytes_read_ += bytes_read; |
| 83 } |
77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | 84 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
78 } | 85 } |
79 | 86 |
80 bool DuplicateContentResourceHandler::OnResponseCompleted( | 87 bool DuplicateContentResourceHandler::OnResponseCompleted( |
81 int request_id, | 88 int request_id, |
82 const net::URLRequestStatus& status, | 89 const net::URLRequestStatus& status, |
83 const std::string& security_info) { | 90 const std::string& security_info) { |
84 | 91 if (track_request_) { |
85 if (status.is_success()) | 92 if (status.is_success()) |
86 RecordContentMetrics(); | 93 RecordContentMetrics(); |
| 94 } |
87 | 95 |
88 return next_handler_->OnResponseCompleted(request_id, status, security_info); | 96 return next_handler_->OnResponseCompleted(request_id, status, security_info); |
89 } | 97 } |
90 | 98 |
91 void DuplicateContentResourceHandler::RecordContentMetrics() { | 99 void DuplicateContentResourceHandler::RecordContentMetrics() { |
92 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | 100 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, |
93 pmurhash_pcarry_, bytes_read_); | 101 pmurhash_pcarry_, bytes_read_); |
94 | 102 |
95 bool is_http_or_https = request_->url().SchemeIs("http") || | 103 bool is_http_or_https = request_->url().SchemeIs("http") || |
96 request_->url().SchemeIs("https"); | 104 request_->url().SchemeIs("https"); |
97 UMA_HISTOGRAM_BOOLEAN("Duplicate.IsHttpOrHttps", is_http_or_https); | 105 UMA_HISTOGRAM_BOOLEAN("Duplicate.IsHttpOrHttps", is_http_or_https); |
98 | 106 |
99 // Combine the contents_hash with the url, so we can test if future content | 107 // Combine the contents_hash with the url, so we can test if future content |
100 // identical resources have the same original url or not. | 108 // identical resources have the same original url or not. |
101 MH_UINT32 hashed_with_url; | |
102 const std::string& url_spec = request_->url().spec(); | 109 const std::string& url_spec = request_->url().spec(); |
103 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 110 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
104 url_spec.data(), url_spec.length()); | 111 url_spec.data(), url_spec.length()); |
105 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | 112 MH_UINT32 hashed_with_url = PMurHash32_Result( |
106 url_spec.length() + bytes_read_); | 113 pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); |
107 | 114 |
108 DVLOG(4) << "url: " << url_spec; | 115 DVLOG(4) << "url: " << url_spec; |
109 DVLOG(4) << "contents hash: " << contents_hash; | 116 DVLOG(4) << "contents hash: " << contents_hash; |
110 DVLOG(4) << "hash with url: " << hashed_with_url; | 117 DVLOG(4) << "hash with url: " << hashed_with_url; |
111 | 118 |
112 std::set<MH_UINT32>* content_matches = | 119 std::set<MH_UINT32>* content_matches = |
113 GlobalDuplicateRecords::GetInstance()->content_matches(); | 120 GlobalDuplicateRecords::GetInstance()->content_matches(); |
114 std::set<MH_UINT32>* content_and_url_matches = | 121 std::set<MH_UINT32>* content_and_url_matches = |
115 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | 122 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
116 | 123 |
(...skipping 14 matching lines...) Expand all Loading... |
131 resource_type_, ResourceType::LAST_TYPE); | 138 resource_type_, ResourceType::LAST_TYPE); |
132 } | 139 } |
133 content_matches->insert(contents_hash); | 140 content_matches->insert(contents_hash); |
134 content_and_url_matches->insert(hashed_with_url); | 141 content_and_url_matches->insert(hashed_with_url); |
135 | 142 |
136 bytes_read_ = 0; | 143 bytes_read_ = 0; |
137 read_buffer_ = NULL; | 144 read_buffer_ = NULL; |
138 } | 145 } |
139 | 146 |
140 } // namespace content | 147 } // namespace content |
OLD | NEW |