Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Side by Side Diff: chrome/browser/safe_browsing/prefix_set.cc

Issue 10892016: Strip out safe-browsing prefix-set checksumming code. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Oops, remove unit test for deleted code. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/prefix_set.h" 5 #include "chrome/browser/safe_browsing/prefix_set.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <math.h> 8 #include <math.h>
9 9
10 #include "base/file_util.h" 10 #include "base/file_util.h"
(...skipping 21 matching lines...) Expand all
32 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. 32 // For |std::upper_bound()| to find a prefix w/in a vector of pairs.
33 bool PrefixLess(const std::pair<SBPrefix,size_t>& a, 33 bool PrefixLess(const std::pair<SBPrefix,size_t>& a,
34 const std::pair<SBPrefix,size_t>& b) { 34 const std::pair<SBPrefix,size_t>& b) {
35 return a.first < b.first; 35 return a.first < b.first;
36 } 36 }
37 37
38 } // namespace 38 } // namespace
39 39
40 namespace safe_browsing { 40 namespace safe_browsing {
41 41
42 PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) 42 PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) {
43 : checksum_(0) {
44 if (sorted_prefixes.size()) { 43 if (sorted_prefixes.size()) {
45 // Estimate the resulting vector sizes. There will be strictly 44 // Estimate the resulting vector sizes. There will be strictly
46 // more than |min_runs| entries in |index_|, but there generally 45 // more than |min_runs| entries in |index_|, but there generally
47 // aren't many forced breaks. 46 // aren't many forced breaks.
48 const size_t min_runs = sorted_prefixes.size() / kMaxRun; 47 const size_t min_runs = sorted_prefixes.size() / kMaxRun;
49 index_.reserve(min_runs); 48 index_.reserve(min_runs);
50 deltas_.reserve(sorted_prefixes.size() - min_runs); 49 deltas_.reserve(sorted_prefixes.size() - min_runs);
51 50
52 // Lead with the first prefix. 51 // Lead with the first prefix.
53 SBPrefix prev_prefix = sorted_prefixes[0]; 52 SBPrefix prev_prefix = sorted_prefixes[0];
54 size_t run_length = 0; 53 size_t run_length = 0;
55 index_.push_back(std::make_pair(prev_prefix, deltas_.size())); 54 index_.push_back(std::make_pair(prev_prefix, deltas_.size()));
56 55
57 // Used to build a checksum from the data used to construct the
58 // structures. Since the data is a bunch of uniform hashes, it
59 // seems reasonable to just xor most of it in, rather than trying
60 // to use a more complicated algorithm.
61 uint32 checksum = static_cast<uint32>(sorted_prefixes[0]);
62 checksum ^= static_cast<uint32>(deltas_.size());
63
64 for (size_t i = 1; i < sorted_prefixes.size(); ++i) { 56 for (size_t i = 1; i < sorted_prefixes.size(); ++i) {
65 // Skip duplicates. 57 // Skip duplicates.
66 if (sorted_prefixes[i] == prev_prefix) 58 if (sorted_prefixes[i] == prev_prefix)
67 continue; 59 continue;
68 60
69 // Calculate the delta. |unsigned| is mandatory, because the 61 // Calculate the delta. |unsigned| is mandatory, because the
70 // sorted_prefixes could be more than INT_MAX apart. 62 // sorted_prefixes could be more than INT_MAX apart.
71 DCHECK_GT(sorted_prefixes[i], prev_prefix); 63 DCHECK_GT(sorted_prefixes[i], prev_prefix);
72 const unsigned delta = sorted_prefixes[i] - prev_prefix; 64 const unsigned delta = sorted_prefixes[i] - prev_prefix;
73 const uint16 delta16 = static_cast<uint16>(delta); 65 const uint16 delta16 = static_cast<uint16>(delta);
74 66
75 // New index ref if the delta doesn't fit, or if too many 67 // New index ref if the delta doesn't fit, or if too many
76 // consecutive deltas have been encoded. 68 // consecutive deltas have been encoded.
77 if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) { 69 if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) {
78 checksum ^= static_cast<uint32>(sorted_prefixes[i]);
79 checksum ^= static_cast<uint32>(deltas_.size());
80 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); 70 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));
81 run_length = 0; 71 run_length = 0;
82 } else { 72 } else {
83 checksum ^= static_cast<uint32>(delta16);
84 // Continue the run of deltas. 73 // Continue the run of deltas.
85 deltas_.push_back(delta16); 74 deltas_.push_back(delta16);
86 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta); 75 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta);
87 ++run_length; 76 ++run_length;
88 } 77 }
89 78
90 prev_prefix = sorted_prefixes[i]; 79 prev_prefix = sorted_prefixes[i];
91 } 80 }
92 checksum_ = checksum;
93 DCHECK(CheckChecksum());
94 81
95 // Send up some memory-usage stats. Bits because fractional bytes 82 // Send up some memory-usage stats. Bits because fractional bytes
96 // are weird. 83 // are weird.
97 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT + 84 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +
98 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT; 85 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;
99 const size_t unique_prefixes = index_.size() + deltas_.size(); 86 const size_t unique_prefixes = index_.size() + deltas_.size();
100 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT; 87 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;
101 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix", 88 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",
102 bits_used / unique_prefixes, 89 bits_used / unique_prefixes,
103 kMaxBitsPerPrefix); 90 kMaxBitsPerPrefix);
104 } 91 }
105 } 92 }
106 93
107 PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, 94 PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index,
108 std::vector<uint16> *deltas) 95 std::vector<uint16> *deltas) {
109 : checksum_(0) {
110 DCHECK(index && deltas); 96 DCHECK(index && deltas);
111 index_.swap(*index); 97 index_.swap(*index);
112 deltas_.swap(*deltas); 98 deltas_.swap(*deltas);
113 } 99 }
114 100
115 PrefixSet::~PrefixSet() {} 101 PrefixSet::~PrefixSet() {}
116 102
117 bool PrefixSet::Exists(SBPrefix prefix) const { 103 bool PrefixSet::Exists(SBPrefix prefix) const {
118 if (index_.empty()) 104 if (index_.empty())
119 return false; 105 return false;
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
158 144
159 SBPrefix current = index_[ii].first; 145 SBPrefix current = index_[ii].first;
160 prefixes->push_back(current); 146 prefixes->push_back(current);
161 for (size_t di = index_[ii].second; di < deltas_end; ++di) { 147 for (size_t di = index_[ii].second; di < deltas_end; ++di) {
162 current += deltas_[di]; 148 current += deltas_[di];
163 prefixes->push_back(current); 149 prefixes->push_back(current);
164 } 150 }
165 } 151 }
166 } 152 }
167 153
168 // NOTE(shess): For debugging potential memory corruption. I wanted
169 // to test that the output of GetPrefixes() matched the checksum over
170 // the unique elements in the input to the constructor, but the buffer
171 // for GetPrefixes() to write to could itself be corrupted. That
172 // would make it look like GetPrefixes() itself was broken. At that
173 // point my head exploded, so I wrote this.
174 SBPrefix PrefixSet::GetPrefixesChecksum() const {
175 SBPrefix checksum = 0;
176
177 for (size_t ii = 0; ii < index_.size(); ++ii) {
178 // The deltas for this |index_| entry run to the next index entry,
179 // or the end of the deltas.
180 const size_t deltas_end =
181 (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();
182
183 SBPrefix current = index_[ii].first;
184 checksum ^= current;
185 for (size_t di = index_[ii].second; di < deltas_end; ++di) {
186 current += deltas_[di];
187 checksum ^= current;
188 }
189 }
190
191 return checksum;
192 }
193
194 // static 154 // static
195 PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) { 155 PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) {
196 int64 size_64; 156 int64 size_64;
197 if (!file_util::GetFileSize(filter_name, &size_64)) 157 if (!file_util::GetFileSize(filter_name, &size_64))
198 return NULL; 158 return NULL;
199 using base::MD5Digest; 159 using base::MD5Digest;
200 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) 160 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))
201 return NULL; 161 return NULL;
202 162
203 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb")); 163 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb"));
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 written = fwrite(&digest, sizeof(digest), 1, file.get()); 279 written = fwrite(&digest, sizeof(digest), 1, file.get());
320 if (written != 1) 280 if (written != 1)
321 return false; 281 return false;
322 282
323 // TODO(shess): Can this code check that the close was successful? 283 // TODO(shess): Can this code check that the close was successful?
324 file.reset(); 284 file.reset();
325 285
326 return true; 286 return true;
327 } 287 }
328 288
329 size_t PrefixSet::IndexBinFor(size_t target_index) const {
330 // The items in |index_| have the logical index of each previous
331 // item in |index_| plus the count of deltas between the items.
332 // Since the indices into |deltas_| are absolute, the logical index
333 // is then the sum of the two indices.
334 size_t lo = 0;
335 size_t hi = index_.size();
336
337 // Binary search because linear search was too slow (really, the
338 // unit test sucked). Inline because the elements can't be compared
339 // in isolation (their position matters).
340 while (hi - lo > 1) {
341 const size_t i = (lo + hi) / 2;
342
343 if (target_index < i + index_[i].second) {
344 DCHECK_LT(i, hi); // Always making progress.
345 hi = i;
346 } else {
347 DCHECK_GT(i, lo); // Always making progress.
348 lo = i;
349 }
350 }
351 return lo;
352 }
353
354 size_t PrefixSet::GetSize() const {
355 return index_.size() + deltas_.size();
356 }
357
358 bool PrefixSet::IsDeltaAt(size_t target_index) const {
359 CHECK_LT(target_index, GetSize());
360
361 const size_t i = IndexBinFor(target_index);
362 return target_index > i + index_[i].second;
363 }
364
365 uint16 PrefixSet::DeltaAt(size_t target_index) const {
366 CHECK_LT(target_index, GetSize());
367
368 // Find the |index_| entry which contains |target_index|.
369 const size_t i = IndexBinFor(target_index);
370
371 // Exactly on the |index_| entry means no delta.
372 CHECK_GT(target_index, i + index_[i].second);
373
374 // -i backs out the |index_| entries, -1 gets the delta that lead to
375 // the value at |target_index|.
376 CHECK_LT(target_index - i - 1, deltas_.size());
377 return deltas_[target_index - i - 1];
378 }
379
380 bool PrefixSet::CheckChecksum() const {
381 uint32 checksum = 0;
382
383 for (size_t ii = 0; ii < index_.size(); ++ii) {
384 checksum ^= static_cast<uint32>(index_[ii].first);
385 checksum ^= static_cast<uint32>(index_[ii].second);
386 }
387
388 for (size_t di = 0; di < deltas_.size(); ++di) {
389 checksum ^= static_cast<uint32>(deltas_[di]);
390 }
391
392 return checksum == checksum_;
393 }
394
395 } // namespace safe_browsing 289 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/browser/safe_browsing/prefix_set.h ('k') | chrome/browser/safe_browsing/prefix_set_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698