OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/safe_browsing/bloom_filter.h" | |
6 | |
7 #include "base/metrics/histogram.h" | |
8 #include "base/rand_util.h" | |
9 #include "net/base/file_stream.h" | |
10 #include "net/base/net_errors.h" | |
11 | |
12 namespace { | |
13 | |
14 // The Jenkins 96 bit mix function: | |
15 // http://www.concentric.net/~Ttwang/tech/inthash.htm | |
16 uint32 HashMix(BloomFilter::HashKey hash_key, uint32 c) { | |
17 uint32 a = static_cast<uint32>(hash_key) & 0xFFFFFFFF; | |
18 uint32 b = static_cast<uint32>(hash_key >> 32) & 0xFFFFFFFF; | |
19 | |
20 a -= (b + c); a ^= (c >> 13); | |
21 b -= (c + a); b ^= (a << 8); | |
22 c -= (a + b); c ^= (b >> 13); | |
23 a -= (b + c); a ^= (c >> 12); | |
24 b -= (c + a); b ^= (a << 16); | |
25 c -= (a + b); c ^= (b >> 5); | |
26 a -= (b + c); a ^= (c >> 3); | |
27 b -= (c + a); b ^= (a << 10); | |
28 c -= (a + b); c ^= (b >> 15); | |
29 | |
30 return c; | |
31 } | |
32 | |
33 uint32 CalculateChecksum(const char* data, size_t size, | |
34 const BloomFilter::HashKeys& keys) { | |
35 uint32 checksum = 0; | |
36 for (size_t i = 0; i < keys.size(); ++i) { | |
37 checksum ^= static_cast<uint32>(keys[i]); | |
38 checksum ^= static_cast<uint32>(keys[i]>>32); | |
39 } | |
40 | |
41 // Watch out for sign-extension in the calculation. | |
42 // TODO(shess): Bit manipulation of signed chars is insane. | |
43 const unsigned char* udata = reinterpret_cast<const unsigned char*>(data); | |
44 for (size_t i = 0; i < size; ++i) { | |
45 checksum ^= udata[i] << ((i * 8) % 32); | |
46 } | |
47 | |
48 return checksum; | |
49 } | |
50 | |
51 } // namespace | |
52 | |
53 // static | |
54 int BloomFilter::FilterSizeForKeyCount(int key_count) { | |
55 const int default_min = BloomFilter::kBloomFilterMinSize; | |
56 const int number_of_keys = std::max(key_count, default_min); | |
57 return std::min(number_of_keys * BloomFilter::kBloomFilterSizeRatio, | |
58 BloomFilter::kBloomFilterMaxSize * 8); | |
59 } | |
60 | |
61 // static | |
62 void BloomFilter::RecordFailure(FailureType failure_type) { | |
63 UMA_HISTOGRAM_ENUMERATION("SB2.BloomFailure", failure_type, | |
64 FAILURE_FILTER_MAX); | |
65 } | |
66 | |
67 BloomFilter::BloomFilter(int bit_size) { | |
68 for (int i = 0; i < kNumHashKeys; ++i) | |
69 hash_keys_.push_back(base::RandUint64()); | |
70 | |
71 // Round up to the next boundary which fits bit_size. | |
72 byte_size_ = (bit_size + 7) / 8; | |
73 bit_size_ = byte_size_ * 8; | |
74 DCHECK_LE(bit_size, bit_size_); // strictly more bits. | |
75 data_.reset(new char[byte_size_]); | |
76 memset(data_.get(), 0, byte_size_); | |
77 checksum_ = CalculateChecksum(data_.get(), 0, hash_keys_); | |
78 } | |
79 | |
80 BloomFilter::BloomFilter(char* data, int size, const HashKeys& keys) | |
81 : hash_keys_(keys) { | |
82 byte_size_ = size; | |
83 bit_size_ = byte_size_ * 8; | |
84 data_.reset(data); | |
85 checksum_ = CalculateChecksum(data_.get(), byte_size_, hash_keys_); | |
86 } | |
87 | |
88 BloomFilter::~BloomFilter() { | |
89 } | |
90 | |
91 void BloomFilter::Insert(SBPrefix hash) { | |
92 uint32 hash_uint32 = static_cast<uint32>(hash); | |
93 for (size_t i = 0; i < hash_keys_.size(); ++i) { | |
94 uint32 index = HashMix(hash_keys_[i], hash_uint32) % bit_size_; | |
95 // Checksum needs to track the transition. Remove the if() when | |
96 // the checksum goes away. | |
97 if (!(data_[index / 8] & (1 << (index % 8)))) { | |
98 data_[index / 8] |= 1 << (index % 8); | |
99 checksum_ ^= 1u << (index % 32); | |
100 } | |
101 } | |
102 } | |
103 | |
104 bool BloomFilter::Exists(SBPrefix hash) const { | |
105 uint32 hash_uint32 = static_cast<uint32>(hash); | |
106 for (size_t i = 0; i < hash_keys_.size(); ++i) { | |
107 uint32 index = HashMix(hash_keys_[i], hash_uint32) % bit_size_; | |
108 if (!(data_[index / 8] & (1 << (index % 8)))) | |
109 return false; | |
110 } | |
111 return true; | |
112 } | |
113 | |
114 // static. | |
115 BloomFilter* BloomFilter::LoadFile(const FilePath& filter_name) { | |
116 net::FileStream filter(NULL); | |
117 | |
118 if (filter.OpenSync(filter_name, | |
119 base::PLATFORM_FILE_OPEN | | |
120 base::PLATFORM_FILE_READ) != net::OK) { | |
121 RecordFailure(FAILURE_FILTER_READ_OPEN); | |
122 return NULL; | |
123 } | |
124 | |
125 // Make sure we have a file version that we can understand. | |
126 int file_version; | |
127 int bytes_read = filter.ReadSync(reinterpret_cast<char*>(&file_version), | |
128 sizeof(file_version)); | |
129 if (bytes_read != sizeof(file_version) || file_version != kFileVersion) { | |
130 RecordFailure(FAILURE_FILTER_READ_VERSION); | |
131 return NULL; | |
132 } | |
133 | |
134 // Get all the random hash keys. | |
135 int num_keys; | |
136 bytes_read = filter.ReadSync(reinterpret_cast<char*>(&num_keys), | |
137 sizeof(num_keys)); | |
138 if (bytes_read != sizeof(num_keys) || | |
139 num_keys < 1 || num_keys > kNumHashKeys) { | |
140 RecordFailure(FAILURE_FILTER_READ_NUM_KEYS); | |
141 return NULL; | |
142 } | |
143 | |
144 HashKeys hash_keys; | |
145 for (int i = 0; i < num_keys; ++i) { | |
146 HashKey key; | |
147 bytes_read = filter.ReadSync(reinterpret_cast<char*>(&key), sizeof(key)); | |
148 if (bytes_read != sizeof(key)) { | |
149 RecordFailure(FAILURE_FILTER_READ_KEY); | |
150 return NULL; | |
151 } | |
152 hash_keys.push_back(key); | |
153 } | |
154 | |
155 // Read in the filter data, with sanity checks on min and max sizes. | |
156 int64 remaining64 = filter.Available(); | |
157 if (remaining64 < kBloomFilterMinSize) { | |
158 RecordFailure(FAILURE_FILTER_READ_DATA_MINSIZE); | |
159 return NULL; | |
160 } else if (remaining64 > kBloomFilterMaxSize) { | |
161 RecordFailure(FAILURE_FILTER_READ_DATA_MAXSIZE); | |
162 return NULL; | |
163 } | |
164 | |
165 int byte_size = static_cast<int>(remaining64); | |
166 scoped_array<char> data(new char[byte_size]); | |
167 bytes_read = filter.ReadSync(data.get(), byte_size); | |
168 if (bytes_read < byte_size) { | |
169 RecordFailure(FAILURE_FILTER_READ_DATA_SHORT); | |
170 return NULL; | |
171 } else if (bytes_read != byte_size) { | |
172 RecordFailure(FAILURE_FILTER_READ_DATA); | |
173 return NULL; | |
174 } | |
175 | |
176 // We've read everything okay, commit the data. | |
177 return new BloomFilter(data.release(), byte_size, hash_keys); | |
178 } | |
179 | |
180 bool BloomFilter::WriteFile(const FilePath& filter_name) const { | |
181 net::FileStream filter(NULL); | |
182 | |
183 if (filter.OpenSync(filter_name, | |
184 base::PLATFORM_FILE_WRITE | | |
185 base::PLATFORM_FILE_CREATE_ALWAYS) != net::OK) | |
186 return false; | |
187 | |
188 // Write the version information. | |
189 int version = kFileVersion; | |
190 int bytes_written = filter.WriteSync(reinterpret_cast<char*>(&version), | |
191 sizeof(version)); | |
192 if (bytes_written != sizeof(version)) | |
193 return false; | |
194 | |
195 // Write the number of random hash keys. | |
196 int num_keys = static_cast<int>(hash_keys_.size()); | |
197 bytes_written = filter.WriteSync(reinterpret_cast<char*>(&num_keys), | |
198 sizeof(num_keys)); | |
199 if (bytes_written != sizeof(num_keys)) | |
200 return false; | |
201 | |
202 for (int i = 0; i < num_keys; ++i) { | |
203 bytes_written = | |
204 filter.WriteSync(reinterpret_cast<const char*>(&hash_keys_[i]), | |
205 sizeof(hash_keys_[i])); | |
206 if (bytes_written != sizeof(hash_keys_[i])) | |
207 return false; | |
208 } | |
209 | |
210 // Write the filter data. | |
211 bytes_written = | |
212 filter.WriteSync(data_.get(), byte_size_); | |
213 if (bytes_written != byte_size_) | |
214 return false; | |
215 | |
216 return true; | |
217 } | |
218 | |
219 bool BloomFilter::CheckChecksum() const { | |
220 const uint32 checksum = CalculateChecksum(data_.get(), byte_size_, | |
221 hash_keys_); | |
222 return checksum_ == checksum; | |
223 } | |
OLD | NEW |