Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "content/browser/renderer_host/duplicate_resource_handler.h" | |
| 6 | |
| 7 #include <cmath> | |
| 8 #include <cstring> | |
| 9 #include <set> | |
| 10 | |
| 11 #include "base/logging.h" | |
| 12 #include "base/metrics/histogram.h" | |
| 13 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
| 14 #include "net/base/io_buffer.h" | |
| 15 #include "net/url_request/url_request.h" | |
| 16 #include "third_party/smhasher/src/PMurHash.h" | |
| 17 | |
| 18 | |
| 19 namespace content { | |
| 20 | |
| 21 namespace{ | |
| 22 | |
| 23 // This set keeps track of a hash of resources | |
| 24 // that we have seen | |
| 25 std::set<uint32>* GetSetOfHashes() { | |
|
gavinp
2012/07/18 12:13:32
Naming by data type isn't ideal. Better by use: Co
frankwang
2012/07/19 16:10:26
Done.
| |
| 26 static std::set<uint32> seen_resources; | |
|
gavinp
2012/07/18 12:13:32
Probably we should bite the bullet, and use base/m
frankwang
2012/07/19 16:10:26
Done.
| |
| 27 return &seen_resources; | |
| 28 } | |
| 29 | |
| 30 // This set keeps track of hash of resources based on origin | |
| 31 // that we have seen previously | |
| 32 std::set<uint32>* GetSetOfHashesWithURL(){ | |
| 33 static std::set<uint32> seen_resources_with_url; | |
| 34 return &seen_resources_with_url; | |
| 35 } | |
| 36 | |
| 37 } // namespace | |
| 38 | |
| 39 DuplicateResourceHandler::DuplicateResourceHandler( | |
| 40 scoped_ptr<ResourceHandler> next_handler, | |
| 41 ResourceType::Type resource_type, | |
| 42 net::URLRequest* request) | |
| 43 : LayeredResourceHandler(next_handler.Pass()), | |
| 44 resource_type_(resource_type), | |
| 45 ph1_(0), | |
| 46 pcarry_(0), | |
| 47 buffer_size_(0), | |
| 48 bytes_read_(0), | |
| 49 request_(request) { | |
| 50 } | |
| 51 | |
| 52 DuplicateResourceHandler::~DuplicateResourceHandler() { | |
| 53 } | |
| 54 | |
| 55 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | |
| 56 int* buf_size, int min_size) { | |
| 57 DCHECK_EQ(-1, min_size); | |
| 58 | |
| 59 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
| 60 return false; | |
| 61 | |
|
gavinp
2012/07/18 12:13:32
Lose this blank line.
frankwang
2012/07/19 16:10:26
Done.
| |
| 62 read_buffer_ = *buf; | |
| 63 buffer_size_ = *buf_size; | |
| 64 return true; | |
| 65 } | |
| 66 | |
| 67 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | |
| 68 bool* defer) { | |
| 69 | |
| 70 PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read); | |
|
gavinp
2012/07/18 12:13:32
spaces after commas.
frankwang
2012/07/19 16:10:26
Done.
| |
| 71 bytes_read_ += bytes_read; | |
| 72 | |
| 73 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
| 74 } | |
| 75 | |
| 76 bool DuplicateResourceHandler::OnResponseCompleted( | |
| 77 int request_id, | |
| 78 const net::URLRequestStatus& status, | |
| 79 const std::string& security_info) { | |
|
gavinp
2012/07/18 12:13:32
What should you do for status != net::URLRequestSt
frankwang
2012/07/19 16:10:26
I put in a check so that I pass through when it is
| |
| 80 | |
| 81 uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_); | |
| 82 | |
| 83 // Hash url into the resource to see whether it is | |
| 84 // from the same or different origin | |
|
gavinp
2012/07/18 12:13:32
Comments should be sentences (have a period) and w
frankwang
2012/07/19 16:10:26
Done.
| |
| 85 uint32 hashed_with_url; | |
| 86 const char* url = request_->url().spec().c_str(); | |
| 87 int url_length = strlen(url); | |
|
gavinp
2012/07/18 12:13:32
This scares me a bit. Is it safe to trust c_str()
frankwang
2012/07/19 16:10:26
Done.
| |
| 88 PMurHash32_Process(&ph1_, &pcarry_, url, url_length); | |
| 89 hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_); | |
| 90 | |
| 91 DVLOG(4) << "url: " << url; | |
| 92 DVLOG(4) << "resource hash: " << resource_hash; | |
| 93 DVLOG(4) << "hash with url: " << hashed_with_url; | |
| 94 | |
| 95 // This boolean answers whether we found resource | |
| 96 // based just on hash | |
| 97 const bool did_we_find_resource = | |
|
gavinp
2012/07/18 12:13:32
did_match_contents_ maybe?
frankwang
2012/07/19 16:10:26
Done.
| |
| 98 GetSetOfHashes()->find(resource_hash) != | |
|
gavinp
2012/07/18 12:13:32
Use count(resource_hash) instead. You don't need t
| |
| 99 GetSetOfHashes()->end(); | |
| 100 | |
| 101 // This boolean checks whether we found a resource from the original url | |
| 102 // as one previously seen | |
| 103 const bool did_we_find_resource_original_url = | |
|
gavinp
2012/07/18 12:13:32
I've thought about it more, and now I don't like t
frankwang
2012/07/19 16:10:26
Done.
| |
| 104 GetSetOfHashesWithURL()->find(hashed_with_url) != | |
| 105 GetSetOfHashesWithURL()->end(); | |
| 106 | |
| 107 // If we found the resource, classify whether it is | |
| 108 // from the same url or different | |
|
gavinp
2012/07/18 12:13:32
Best practice is to have a single instance of each
frankwang
2012/07/19 16:10:26
Done.
| |
| 109 if (did_we_find_resource) { | |
| 110 // If it is from the original url, it will hit on both caches | |
| 111 if (did_we_find_resource_original_url) { | |
| 112 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); | |
|
gavinp
2012/07/18 12:13:32
I've thought about this a bit more: I think you ju
frankwang
2012/07/19 16:10:26
Done.
| |
| 113 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true); | |
| 114 } else { | |
| 115 // If it is a different url (interesting case), it hits on the | |
| 116 // proposed cache not the current cache | |
| 117 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); | |
| 118 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); | |
| 119 // Record bytes missed because we are caching | |
| 120 // based on origin instead of resource | |
| 121 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_, | |
|
gavinp
2012/07/18 12:13:32
I think you want: "Duplicate.Size.HashHitUrlMiss"
frankwang
2012/07/19 16:10:26
Done.
| |
| 122 1, 0x7FFFFFFF, 50); | |
| 123 // Record resource type for missed resource | |
| 124 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_, | |
|
gavinp
2012/07/18 12:13:32
The name should reflect that it's for missed resou
frankwang
2012/07/19 16:10:26
Done.
| |
| 125 ResourceType::LAST_TYPE); | |
| 126 GetSetOfHashesWithURL()->insert(hashed_with_url); | |
| 127 } | |
| 128 } else { | |
| 129 // We did not see the resource so it is a miss on both caches | |
|
gavinp
2012/07/18 12:13:32
I don't like comments like this. The logic should
frankwang
2012/07/19 16:10:26
Done.
| |
| 130 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false); | |
| 131 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); | |
| 132 GetSetOfHashes()->insert(resource_hash); | |
| 133 GetSetOfHashesWithURL()->insert(hashed_with_url); | |
| 134 } | |
| 135 | |
| 136 bytes_read_ = 0; | |
| 137 read_buffer_ = NULL; | |
| 138 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
| 139 } | |
| 140 | |
| 141 } //namespace content | |
| OLD | NEW |