chrome/browser/safe_browsing/prefix_set.cc - Issue 10892016: Strip out safe-browsing prefix-set checksumming code.

Side by Side Diff: chrome/browser/safe_browsing/prefix_set.cc

Issue 10892016: Strip out safe-browsing prefix-set checksumming code. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Oops, remove unit test for deleted code. Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/safe_browsing/prefix_set.h"	5 #include "chrome/browser/safe_browsing/prefix_set.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <math.h>	8 #include <math.h>

9	9

10 #include "base/file_util.h"	10 #include "base/file_util.h"

(...skipping 21 matching lines...) Expand all Loading...
32 // For \|std::upper_bound()\| to find a prefix w/in a vector of pairs.	32 // For \|std::upper_bound()\| to find a prefix w/in a vector of pairs.

33 bool PrefixLess(const std::pair<SBPrefix,size_t>& a,	33 bool PrefixLess(const std::pair<SBPrefix,size_t>& a,

34 const std::pair<SBPrefix,size_t>& b) {	34 const std::pair<SBPrefix,size_t>& b) {

35 return a.first < b.first;	35 return a.first < b.first;

36 }	36 }

37	37

38 } // namespace	38 } // namespace

39	39

40 namespace safe_browsing {	40 namespace safe_browsing {

41	41

42 PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes)	42 PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) {

43 : checksum_(0) {

44 if (sorted_prefixes.size()) {	43 if (sorted_prefixes.size()) {

45 // Estimate the resulting vector sizes. There will be strictly	44 // Estimate the resulting vector sizes. There will be strictly

46 // more than \|min_runs\| entries in \|index_\|, but there generally	45 // more than \|min_runs\| entries in \|index_\|, but there generally

47 // aren't many forced breaks.	46 // aren't many forced breaks.

48 const size_t min_runs = sorted_prefixes.size() / kMaxRun;	47 const size_t min_runs = sorted_prefixes.size() / kMaxRun;

49 index_.reserve(min_runs);	48 index_.reserve(min_runs);

50 deltas_.reserve(sorted_prefixes.size() - min_runs);	49 deltas_.reserve(sorted_prefixes.size() - min_runs);

51	50

52 // Lead with the first prefix.	51 // Lead with the first prefix.

53 SBPrefix prev_prefix = sorted_prefixes[0];	52 SBPrefix prev_prefix = sorted_prefixes[0];

54 size_t run_length = 0;	53 size_t run_length = 0;

55 index_.push_back(std::make_pair(prev_prefix, deltas_.size()));	54 index_.push_back(std::make_pair(prev_prefix, deltas_.size()));

56	55

57 // Used to build a checksum from the data used to construct the

58 // structures. Since the data is a bunch of uniform hashes, it

59 // seems reasonable to just xor most of it in, rather than trying

60 // to use a more complicated algorithm.

61 uint32 checksum = static_cast<uint32>(sorted_prefixes[0]);

62 checksum ^= static_cast<uint32>(deltas_.size());

63

64 for (size_t i = 1; i < sorted_prefixes.size(); ++i) {	56 for (size_t i = 1; i < sorted_prefixes.size(); ++i) {

65 // Skip duplicates.	57 // Skip duplicates.

66 if (sorted_prefixes[i] == prev_prefix)	58 if (sorted_prefixes[i] == prev_prefix)

67 continue;	59 continue;

68	60

69 // Calculate the delta. \|unsigned\| is mandatory, because the	61 // Calculate the delta. \|unsigned\| is mandatory, because the

70 // sorted_prefixes could be more than INT_MAX apart.	62 // sorted_prefixes could be more than INT_MAX apart.

71 DCHECK_GT(sorted_prefixes[i], prev_prefix);	63 DCHECK_GT(sorted_prefixes[i], prev_prefix);

72 const unsigned delta = sorted_prefixes[i] - prev_prefix;	64 const unsigned delta = sorted_prefixes[i] - prev_prefix;

73 const uint16 delta16 = static_cast<uint16>(delta);	65 const uint16 delta16 = static_cast<uint16>(delta);

74	66

75 // New index ref if the delta doesn't fit, or if too many	67 // New index ref if the delta doesn't fit, or if too many

76 // consecutive deltas have been encoded.	68 // consecutive deltas have been encoded.

77 if (delta != static_cast<unsigned>(delta16) \|\| run_length >= kMaxRun) {	69 if (delta != static_cast<unsigned>(delta16) \|\| run_length >= kMaxRun) {

78 checksum ^= static_cast<uint32>(sorted_prefixes[i]);

79 checksum ^= static_cast<uint32>(deltas_.size());

80 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));	70 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));

81 run_length = 0;	71 run_length = 0;

82 } else {	72 } else {

83 checksum ^= static_cast<uint32>(delta16);

84 // Continue the run of deltas.	73 // Continue the run of deltas.

85 deltas_.push_back(delta16);	74 deltas_.push_back(delta16);

86 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta);	75 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta);

87 ++run_length;	76 ++run_length;

88 }	77 }

89	78

90 prev_prefix = sorted_prefixes[i];	79 prev_prefix = sorted_prefixes[i];

91 }	80 }

92 checksum_ = checksum;

93 DCHECK(CheckChecksum());

94	81

95 // Send up some memory-usage stats. Bits because fractional bytes	82 // Send up some memory-usage stats. Bits because fractional bytes

96 // are weird.	83 // are weird.

97 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +	84 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +

98 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;	85 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;

99 const size_t unique_prefixes = index_.size() + deltas_.size();	86 const size_t unique_prefixes = index_.size() + deltas_.size();

100 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;	87 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;

101 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",	88 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",

102 bits_used / unique_prefixes,	89 bits_used / unique_prefixes,

103 kMaxBitsPerPrefix);	90 kMaxBitsPerPrefix);

104 }	91 }

105 }	92 }

106	93

107 PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index,	94 PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index,

108 std::vector<uint16> *deltas)	95 std::vector<uint16> *deltas) {

109 : checksum_(0) {

110 DCHECK(index && deltas);	96 DCHECK(index && deltas);

111 index_.swap(*index);	97 index_.swap(*index);

112 deltas_.swap(*deltas);	98 deltas_.swap(*deltas);

113 }	99 }

114	100

115 PrefixSet::~PrefixSet() {}	101 PrefixSet::~PrefixSet() {}

116	102

117 bool PrefixSet::Exists(SBPrefix prefix) const {	103 bool PrefixSet::Exists(SBPrefix prefix) const {

118 if (index_.empty())	104 if (index_.empty())

119 return false;	105 return false;

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
158	144

159 SBPrefix current = index_[ii].first;	145 SBPrefix current = index_[ii].first;

160 prefixes->push_back(current);	146 prefixes->push_back(current);

161 for (size_t di = index_[ii].second; di < deltas_end; ++di) {	147 for (size_t di = index_[ii].second; di < deltas_end; ++di) {

162 current += deltas_[di];	148 current += deltas_[di];

163 prefixes->push_back(current);	149 prefixes->push_back(current);

164 }	150 }

165 }	151 }

166 }	152 }

167	153

168 // NOTE(shess): For debugging potential memory corruption. I wanted

169 // to test that the output of GetPrefixes() matched the checksum over

170 // the unique elements in the input to the constructor, but the buffer

171 // for GetPrefixes() to write to could itself be corrupted. That

172 // would make it look like GetPrefixes() itself was broken. At that

173 // point my head exploded, so I wrote this.

174 SBPrefix PrefixSet::GetPrefixesChecksum() const {

175 SBPrefix checksum = 0;

176

177 for (size_t ii = 0; ii < index_.size(); ++ii) {

178 // The deltas for this \|index_\| entry run to the next index entry,

179 // or the end of the deltas.

180 const size_t deltas_end =

181 (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();

182

183 SBPrefix current = index_[ii].first;

184 checksum ^= current;

185 for (size_t di = index_[ii].second; di < deltas_end; ++di) {

186 current += deltas_[di];

187 checksum ^= current;

188 }

189 }

190

191 return checksum;

192 }

193

194 // static	154 // static

195 PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) {	155 PrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) {

196 int64 size_64;	156 int64 size_64;

197 if (!file_util::GetFileSize(filter_name, &size_64))	157 if (!file_util::GetFileSize(filter_name, &size_64))

198 return NULL;	158 return NULL;

199 using base::MD5Digest;	159 using base::MD5Digest;

200 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))	160 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))

201 return NULL;	161 return NULL;

202	162

203 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb"));	163 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb"));

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
319 written = fwrite(&digest, sizeof(digest), 1, file.get());	279 written = fwrite(&digest, sizeof(digest), 1, file.get());

320 if (written != 1)	280 if (written != 1)

321 return false;	281 return false;

322	282

323 // TODO(shess): Can this code check that the close was successful?	283 // TODO(shess): Can this code check that the close was successful?

324 file.reset();	284 file.reset();

325	285

326 return true;	286 return true;

327 }	287 }

328	288

329 size_t PrefixSet::IndexBinFor(size_t target_index) const {

330 // The items in \|index_\| have the logical index of each previous

331 // item in \|index_\| plus the count of deltas between the items.

332 // Since the indices into \|deltas_\| are absolute, the logical index

333 // is then the sum of the two indices.

334 size_t lo = 0;

335 size_t hi = index_.size();

336

337 // Binary search because linear search was too slow (really, the

338 // unit test sucked). Inline because the elements can't be compared

339 // in isolation (their position matters).

340 while (hi - lo > 1) {

341 const size_t i = (lo + hi) / 2;

342

343 if (target_index < i + index_[i].second) {

344 DCHECK_LT(i, hi); // Always making progress.

345 hi = i;

346 } else {

347 DCHECK_GT(i, lo); // Always making progress.

348 lo = i;

349 }

350 }

351 return lo;

352 }

353

354 size_t PrefixSet::GetSize() const {

355 return index_.size() + deltas_.size();

356 }

357

358 bool PrefixSet::IsDeltaAt(size_t target_index) const {

359 CHECK_LT(target_index, GetSize());

360

361 const size_t i = IndexBinFor(target_index);

362 return target_index > i + index_[i].second;

363 }

364

365 uint16 PrefixSet::DeltaAt(size_t target_index) const {

366 CHECK_LT(target_index, GetSize());

367

368 // Find the \|index_\| entry which contains \|target_index\|.

369 const size_t i = IndexBinFor(target_index);

370

371 // Exactly on the \|index_\| entry means no delta.

372 CHECK_GT(target_index, i + index_[i].second);

373

374 // -i backs out the \|index_\| entries, -1 gets the delta that lead to

375 // the value at \|target_index\|.

376 CHECK_LT(target_index - i - 1, deltas_.size());

377 return deltas_[target_index - i - 1];

378 }

379

380 bool PrefixSet::CheckChecksum() const {

381 uint32 checksum = 0;

382

383 for (size_t ii = 0; ii < index_.size(); ++ii) {

384 checksum ^= static_cast<uint32>(index_[ii].first);

385 checksum ^= static_cast<uint32>(index_[ii].second);

386 }

387

388 for (size_t di = 0; di < deltas_.size(); ++di) {

389 checksum ^= static_cast<uint32>(deltas_[di]);

390 }

391

392 return checksum == checksum_;

393 }

394

395 } // namespace safe_browsing	289 } // namespace safe_browsing

OLD	NEW

« no previous file with comments | « chrome/browser/safe_browsing/prefix_set.h ('k') | chrome/browser/safe_browsing/prefix_set_unittest.cc » ('j') | no next file with comments »