Chromium Code Reviews| Index: content/browser/renderer_host/duplicate_resource_handler.cc |
| =================================================================== |
| --- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
| +++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
| @@ -0,0 +1,141 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "content/browser/renderer_host/duplicate_resource_handler.h" |
| + |
| +#include <cmath> |
| +#include <cstring> |
| +#include <set> |
| + |
| +#include "base/logging.h" |
| +#include "base/metrics/histogram.h" |
| +#include "content/browser/renderer_host/resource_request_info_impl.h" |
| +#include "net/base/io_buffer.h" |
| +#include "net/url_request/url_request.h" |
| +#include "third_party/smhasher/src/PMurHash.h" |
| + |
| + |
| +namespace content { |
| + |
| +namespace{ |
| + |
| +// This set keeps track of a hash of resources |
| +// that we have seen |
| +std::set<uint32>* GetSetOfHashes() { |
|
gavinp
2012/07/18 12:13:32
Naming by data type isn't ideal. Better by use: Co
frankwang
2012/07/19 16:10:26
Done.
|
| + static std::set<uint32> seen_resources; |
|
gavinp
2012/07/18 12:13:32
Probably we should bite the bullet, and use base/m
frankwang
2012/07/19 16:10:26
Done.
|
| + return &seen_resources; |
| +} |
| + |
| +// This set keeps track of hash of resources based on origin |
| +// that we have seen previously |
| +std::set<uint32>* GetSetOfHashesWithURL(){ |
| + static std::set<uint32> seen_resources_with_url; |
| + return &seen_resources_with_url; |
| +} |
| + |
| +} // namespace |
| + |
| +DuplicateResourceHandler::DuplicateResourceHandler( |
| + scoped_ptr<ResourceHandler> next_handler, |
| + ResourceType::Type resource_type, |
| + net::URLRequest* request) |
| + : LayeredResourceHandler(next_handler.Pass()), |
| + resource_type_(resource_type), |
| + ph1_(0), |
| + pcarry_(0), |
| + buffer_size_(0), |
| + bytes_read_(0), |
| + request_(request) { |
| +} |
| + |
| +DuplicateResourceHandler::~DuplicateResourceHandler() { |
| +} |
| + |
| +bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, |
| + int* buf_size, int min_size) { |
| + DCHECK_EQ(-1, min_size); |
| + |
| + if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
| + return false; |
| + |
|
gavinp
2012/07/18 12:13:32
Lose this blank line.
frankwang
2012/07/19 16:10:26
Done.
|
| + read_buffer_ = *buf; |
| + buffer_size_ = *buf_size; |
| + return true; |
| +} |
| + |
| +bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, |
| + bool* defer) { |
| + |
| + PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read); |
|
gavinp
2012/07/18 12:13:32
spaces after commas.
frankwang
2012/07/19 16:10:26
Done.
|
| + bytes_read_ += bytes_read; |
| + |
| + return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
| +} |
| + |
| +bool DuplicateResourceHandler::OnResponseCompleted( |
| + int request_id, |
| + const net::URLRequestStatus& status, |
| + const std::string& security_info) { |
|
gavinp
2012/07/18 12:13:32
What should you do for status != net::URLRequestSt
frankwang
2012/07/19 16:10:26
I put in a check so that I pass through when it is
|
| + |
| + uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_); |
| + |
| + // Hash url into the resource to see whether it is |
| + // from the same or different origin |
|
gavinp
2012/07/18 12:13:32
Comments should be sentences (have a period) and w
frankwang
2012/07/19 16:10:26
Done.
|
| + uint32 hashed_with_url; |
| + const char* url = request_->url().spec().c_str(); |
| + int url_length = strlen(url); |
|
gavinp
2012/07/18 12:13:32
This scares me a bit. Is it safe to trust c_str()
frankwang
2012/07/19 16:10:26
Done.
|
| + PMurHash32_Process(&ph1_, &pcarry_, url, url_length); |
| + hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_); |
| + |
| + DVLOG(4) << "url: " << url; |
| + DVLOG(4) << "resource hash: " << resource_hash; |
| + DVLOG(4) << "hash with url: " << hashed_with_url; |
| + |
| + // This boolean answers whether we found resource |
| + // based just on hash |
| + const bool did_we_find_resource = |
|
gavinp
2012/07/18 12:13:32
did_match_contents_ maybe?
frankwang
2012/07/19 16:10:26
Done.
|
| + GetSetOfHashes()->find(resource_hash) != |
|
gavinp
2012/07/18 12:13:32
Use count(resource_hash) instead. You don't need t
|
| + GetSetOfHashes()->end(); |
| + |
| + // This boolean checks whether we found a resource from the original url |
| + // as one previously seen |
| + const bool did_we_find_resource_original_url = |
|
gavinp
2012/07/18 12:13:32
I've thought about it more, and now I don't like t
frankwang
2012/07/19 16:10:26
Done.
|
| + GetSetOfHashesWithURL()->find(hashed_with_url) != |
| + GetSetOfHashesWithURL()->end(); |
| + |
| + // If we found the resource, classify whether it is |
| + // from the same url or different |
|
gavinp
2012/07/18 12:13:32
Best practice is to have a single instance of each
frankwang
2012/07/19 16:10:26
Done.
|
| + if (did_we_find_resource) { |
| + // If it is from the original url, it will hit on both caches |
| + if (did_we_find_resource_original_url) { |
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); |
|
gavinp
2012/07/18 12:13:32
I've thought about this a bit more: I think you ju
frankwang
2012/07/19 16:10:26
Done.
|
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true); |
| + } else { |
| + // If it is a different url (interesting case), it hits on the |
| + // proposed cache not the current cache |
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); |
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); |
| + // Record bytes missed because we are caching |
| + // based on origin instead of resource |
| + UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_, |
|
gavinp
2012/07/18 12:13:32
I think you want: "Duplicate.Size.HashHitUrlMiss"
frankwang
2012/07/19 16:10:26
Done.
|
| + 1, 0x7FFFFFFF, 50); |
| + // Record resource type for missed resource |
| + UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_, |
|
gavinp
2012/07/18 12:13:32
The name should reflect that it's for missed resou
frankwang
2012/07/19 16:10:26
Done.
|
| + ResourceType::LAST_TYPE); |
| + GetSetOfHashesWithURL()->insert(hashed_with_url); |
| + } |
| + } else { |
| + // We did not see the resource so it is a miss on both caches |
|
gavinp
2012/07/18 12:13:32
I don't like comments like this. The logic should
frankwang
2012/07/19 16:10:26
Done.
|
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false); |
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); |
| + GetSetOfHashes()->insert(resource_hash); |
| + GetSetOfHashesWithURL()->insert(hashed_with_url); |
| + } |
| + |
| + bytes_read_ = 0; |
| + read_buffer_ = NULL; |
| + return next_handler_->OnResponseCompleted(request_id, status, security_info); |
| +} |
| + |
| +} //namespace content |