Index: content/browser/renderer_host/duplicate_content_resource_handler.cc |
diff --git a/content/browser/renderer_host/duplicate_content_resource_handler.cc b/content/browser/renderer_host/duplicate_content_resource_handler.cc |
index 5760db45bf90a2af5eeabcf2290a1d6c22ef1b8a..c94f511594bf945f977b8eba0b7c0b9042d72074 100644 |
--- a/content/browser/renderer_host/duplicate_content_resource_handler.cc |
+++ b/content/browser/renderer_host/duplicate_content_resource_handler.cc |
@@ -51,6 +51,11 @@ DuplicateContentResourceHandler::DuplicateContentResourceHandler( |
request_(request), |
pmurhash_ph1_(0), |
pmurhash_pcarry_(0) { |
+ // Ignore everything that's not http/https. Specifically, exclude data and |
+ // blob URLs which can be generated by content and cause the maintained sets |
+ // to grow without bounds. |
+ const GURL& url = request_->url(); |
+ track_request_ = url.SchemeIs("http") || url.SchemeIs("https"); |
} |
DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { |
@@ -71,9 +76,11 @@ bool DuplicateContentResourceHandler::OnWillRead(int request_id, |
bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
int bytes_read, |
bool* defer) { |
- PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
- read_buffer_->data(), bytes_read); |
- bytes_read_ += bytes_read; |
+ if (track_request_) { |
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
+ read_buffer_->data(), bytes_read); |
+ bytes_read_ += bytes_read; |
+ } |
return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
} |
@@ -81,9 +88,10 @@ bool DuplicateContentResourceHandler::OnResponseCompleted( |
int request_id, |
const net::URLRequestStatus& status, |
const std::string& security_info) { |
- |
- if (status.is_success()) |
- RecordContentMetrics(); |
+ if (track_request_) { |
+ if (status.is_success()) |
+ RecordContentMetrics(); |
+ } |
return next_handler_->OnResponseCompleted(request_id, status, security_info); |
} |
@@ -98,12 +106,11 @@ void DuplicateContentResourceHandler::RecordContentMetrics() { |
// Combine the contents_hash with the url, so we can test if future content |
// identical resources have the same original url or not. |
- MH_UINT32 hashed_with_url; |
const std::string& url_spec = request_->url().spec(); |
PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
url_spec.data(), url_spec.length()); |
- hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, |
- url_spec.length() + bytes_read_); |
+ MH_UINT32 hashed_with_url = PMurHash32_Result( |
+ pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); |
DVLOG(4) << "url: " << url_spec; |
DVLOG(4) << "contents hash: " << contents_hash; |