Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Side by Side Diff: chrome/browser/safe_browsing/bloom_filter.cc

Issue 11196036: Finish conversion from bloom filter to prefix set in safe browsing. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Delete the unnecessary unit test. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/bloom_filter.h"
6
7 #include "base/metrics/histogram.h"
8 #include "base/rand_util.h"
9 #include "net/base/file_stream.h"
10 #include "net/base/net_errors.h"
11
12 namespace {
13
14 // The Jenkins 96 bit mix function:
15 // http://www.concentric.net/~Ttwang/tech/inthash.htm
16 uint32 HashMix(BloomFilter::HashKey hash_key, uint32 c) {
17 uint32 a = static_cast<uint32>(hash_key) & 0xFFFFFFFF;
18 uint32 b = static_cast<uint32>(hash_key >> 32) & 0xFFFFFFFF;
19
20 a -= (b + c); a ^= (c >> 13);
21 b -= (c + a); b ^= (a << 8);
22 c -= (a + b); c ^= (b >> 13);
23 a -= (b + c); a ^= (c >> 12);
24 b -= (c + a); b ^= (a << 16);
25 c -= (a + b); c ^= (b >> 5);
26 a -= (b + c); a ^= (c >> 3);
27 b -= (c + a); b ^= (a << 10);
28 c -= (a + b); c ^= (b >> 15);
29
30 return c;
31 }
32
33 uint32 CalculateChecksum(const char* data, size_t size,
34 const BloomFilter::HashKeys& keys) {
35 uint32 checksum = 0;
36 for (size_t i = 0; i < keys.size(); ++i) {
37 checksum ^= static_cast<uint32>(keys[i]);
38 checksum ^= static_cast<uint32>(keys[i]>>32);
39 }
40
41 // Watch out for sign-extension in the calculation.
42 // TODO(shess): Bit manipulation of signed chars is insane.
43 const unsigned char* udata = reinterpret_cast<const unsigned char*>(data);
44 for (size_t i = 0; i < size; ++i) {
45 checksum ^= udata[i] << ((i * 8) % 32);
46 }
47
48 return checksum;
49 }
50
51 } // namespace
52
53 // static
54 int BloomFilter::FilterSizeForKeyCount(int key_count) {
55 const int default_min = BloomFilter::kBloomFilterMinSize;
56 const int number_of_keys = std::max(key_count, default_min);
57 return std::min(number_of_keys * BloomFilter::kBloomFilterSizeRatio,
58 BloomFilter::kBloomFilterMaxSize * 8);
59 }
60
61 // static
62 void BloomFilter::RecordFailure(FailureType failure_type) {
63 UMA_HISTOGRAM_ENUMERATION("SB2.BloomFailure", failure_type,
64 FAILURE_FILTER_MAX);
65 }
66
67 BloomFilter::BloomFilter(int bit_size) {
68 for (int i = 0; i < kNumHashKeys; ++i)
69 hash_keys_.push_back(base::RandUint64());
70
71 // Round up to the next boundary which fits bit_size.
72 byte_size_ = (bit_size + 7) / 8;
73 bit_size_ = byte_size_ * 8;
74 DCHECK_LE(bit_size, bit_size_); // strictly more bits.
75 data_.reset(new char[byte_size_]);
76 memset(data_.get(), 0, byte_size_);
77 checksum_ = CalculateChecksum(data_.get(), 0, hash_keys_);
78 }
79
80 BloomFilter::BloomFilter(char* data, int size, const HashKeys& keys)
81 : hash_keys_(keys) {
82 byte_size_ = size;
83 bit_size_ = byte_size_ * 8;
84 data_.reset(data);
85 checksum_ = CalculateChecksum(data_.get(), byte_size_, hash_keys_);
86 }
87
88 BloomFilter::~BloomFilter() {
89 }
90
91 void BloomFilter::Insert(SBPrefix hash) {
92 uint32 hash_uint32 = static_cast<uint32>(hash);
93 for (size_t i = 0; i < hash_keys_.size(); ++i) {
94 uint32 index = HashMix(hash_keys_[i], hash_uint32) % bit_size_;
95 // Checksum needs to track the transition. Remove the if() when
96 // the checksum goes away.
97 if (!(data_[index / 8] & (1 << (index % 8)))) {
98 data_[index / 8] |= 1 << (index % 8);
99 checksum_ ^= 1u << (index % 32);
100 }
101 }
102 }
103
104 bool BloomFilter::Exists(SBPrefix hash) const {
105 uint32 hash_uint32 = static_cast<uint32>(hash);
106 for (size_t i = 0; i < hash_keys_.size(); ++i) {
107 uint32 index = HashMix(hash_keys_[i], hash_uint32) % bit_size_;
108 if (!(data_[index / 8] & (1 << (index % 8))))
109 return false;
110 }
111 return true;
112 }
113
114 // static.
115 BloomFilter* BloomFilter::LoadFile(const FilePath& filter_name) {
116 net::FileStream filter(NULL);
117
118 if (filter.OpenSync(filter_name,
119 base::PLATFORM_FILE_OPEN |
120 base::PLATFORM_FILE_READ) != net::OK) {
121 RecordFailure(FAILURE_FILTER_READ_OPEN);
122 return NULL;
123 }
124
125 // Make sure we have a file version that we can understand.
126 int file_version;
127 int bytes_read = filter.ReadSync(reinterpret_cast<char*>(&file_version),
128 sizeof(file_version));
129 if (bytes_read != sizeof(file_version) || file_version != kFileVersion) {
130 RecordFailure(FAILURE_FILTER_READ_VERSION);
131 return NULL;
132 }
133
134 // Get all the random hash keys.
135 int num_keys;
136 bytes_read = filter.ReadSync(reinterpret_cast<char*>(&num_keys),
137 sizeof(num_keys));
138 if (bytes_read != sizeof(num_keys) ||
139 num_keys < 1 || num_keys > kNumHashKeys) {
140 RecordFailure(FAILURE_FILTER_READ_NUM_KEYS);
141 return NULL;
142 }
143
144 HashKeys hash_keys;
145 for (int i = 0; i < num_keys; ++i) {
146 HashKey key;
147 bytes_read = filter.ReadSync(reinterpret_cast<char*>(&key), sizeof(key));
148 if (bytes_read != sizeof(key)) {
149 RecordFailure(FAILURE_FILTER_READ_KEY);
150 return NULL;
151 }
152 hash_keys.push_back(key);
153 }
154
155 // Read in the filter data, with sanity checks on min and max sizes.
156 int64 remaining64 = filter.Available();
157 if (remaining64 < kBloomFilterMinSize) {
158 RecordFailure(FAILURE_FILTER_READ_DATA_MINSIZE);
159 return NULL;
160 } else if (remaining64 > kBloomFilterMaxSize) {
161 RecordFailure(FAILURE_FILTER_READ_DATA_MAXSIZE);
162 return NULL;
163 }
164
165 int byte_size = static_cast<int>(remaining64);
166 scoped_array<char> data(new char[byte_size]);
167 bytes_read = filter.ReadSync(data.get(), byte_size);
168 if (bytes_read < byte_size) {
169 RecordFailure(FAILURE_FILTER_READ_DATA_SHORT);
170 return NULL;
171 } else if (bytes_read != byte_size) {
172 RecordFailure(FAILURE_FILTER_READ_DATA);
173 return NULL;
174 }
175
176 // We've read everything okay, commit the data.
177 return new BloomFilter(data.release(), byte_size, hash_keys);
178 }
179
180 bool BloomFilter::WriteFile(const FilePath& filter_name) const {
181 net::FileStream filter(NULL);
182
183 if (filter.OpenSync(filter_name,
184 base::PLATFORM_FILE_WRITE |
185 base::PLATFORM_FILE_CREATE_ALWAYS) != net::OK)
186 return false;
187
188 // Write the version information.
189 int version = kFileVersion;
190 int bytes_written = filter.WriteSync(reinterpret_cast<char*>(&version),
191 sizeof(version));
192 if (bytes_written != sizeof(version))
193 return false;
194
195 // Write the number of random hash keys.
196 int num_keys = static_cast<int>(hash_keys_.size());
197 bytes_written = filter.WriteSync(reinterpret_cast<char*>(&num_keys),
198 sizeof(num_keys));
199 if (bytes_written != sizeof(num_keys))
200 return false;
201
202 for (int i = 0; i < num_keys; ++i) {
203 bytes_written =
204 filter.WriteSync(reinterpret_cast<const char*>(&hash_keys_[i]),
205 sizeof(hash_keys_[i]));
206 if (bytes_written != sizeof(hash_keys_[i]))
207 return false;
208 }
209
210 // Write the filter data.
211 bytes_written =
212 filter.WriteSync(data_.get(), byte_size_);
213 if (bytes_written != byte_size_)
214 return false;
215
216 return true;
217 }
218
219 bool BloomFilter::CheckChecksum() const {
220 const uint32 checksum = CalculateChecksum(data_.get(), byte_size_,
221 hash_keys_);
222 return checksum_ == checksum;
223 }
OLDNEW
« no previous file with comments | « chrome/browser/safe_browsing/bloom_filter.h ('k') | chrome/browser/safe_browsing/bloom_filter_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698