OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // A simple bloom filter. It uses a large number (20) of hashes to reduce the | |
6 // possibility of false positives. The bloom filter's hashing uses random keys | |
7 // in order to minimize the chance that a false positive for one user is a false | |
8 // positive for all. | |
9 // | |
10 // The bloom filter manages it serialization to disk with the following file | |
11 // format: | |
12 // 4 byte version number | |
13 // 4 byte number of hash keys (n) | |
14 // n * 8 bytes of hash keys | |
15 // Remaining bytes are the filter data. | |
16 | |
17 #ifndef CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ | |
18 #define CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ | |
19 | |
20 #include <vector> | |
21 | |
22 #include "base/gtest_prod_util.h" | |
23 #include "base/memory/ref_counted.h" | |
24 #include "base/memory/scoped_ptr.h" | |
25 #include "chrome/browser/safe_browsing/safe_browsing_util.h" | |
26 | |
27 class FilePath; | |
28 | |
29 class BloomFilter : public base::RefCountedThreadSafe<BloomFilter> { | |
30 public: | |
31 typedef uint64 HashKey; | |
32 typedef std::vector<HashKey> HashKeys; | |
33 | |
34 // Constructs an empty filter with the given size. | |
35 explicit BloomFilter(int bit_size); | |
36 | |
37 // Constructs a filter from serialized data. This object owns the memory and | |
38 // will delete it on destruction. | |
39 BloomFilter(char* data, int size, const HashKeys& keys); | |
40 | |
41 void Insert(SBPrefix hash); | |
42 bool Exists(SBPrefix hash) const; | |
43 | |
44 const char* data() const { return data_.get(); } | |
45 int size() const { return byte_size_; } | |
46 | |
47 // Loading and storing the filter from / to disk. | |
48 static BloomFilter* LoadFile(const FilePath& filter_name); | |
49 bool WriteFile(const FilePath& filter_name) const; | |
50 | |
51 // How many bits to use per item. See the design doc for more information. | |
52 static const int kBloomFilterSizeRatio = 25; | |
53 | |
54 // Force a minimum size on the bloom filter to prevent a high false positive | |
55 // hash request rate (in bytes). | |
56 static const int kBloomFilterMinSize = 250000; | |
57 | |
58 // Force a maximum size on the bloom filter to avoid using too much memory | |
59 // (in bytes). | |
60 static const int kBloomFilterMaxSize = 3 * 1024 * 1024; | |
61 | |
62 // Use the above constants to calculate an appropriate size to pass | |
63 // to the BloomFilter constructor based on the intended |key_count|. | |
64 // TODO(shess): This is very clunky. It would be cleaner to have | |
65 // the constructor manage this, but at this time the unit and perf | |
66 // tests wish to make their own calculations. | |
67 static int FilterSizeForKeyCount(int key_count); | |
68 | |
69 // Check whether the contents of the bloom filter have changed since | |
70 // construction. Present while debugging PrefixSet. | |
71 bool CheckChecksum() const; | |
72 | |
73 private: | |
74 friend class base::RefCountedThreadSafe<BloomFilter>; | |
75 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterUse); | |
76 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingBloomFilter, BloomFilterFile); | |
77 | |
78 static const int kNumHashKeys = 20; | |
79 static const int kFileVersion = 1; | |
80 | |
81 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE | |
82 // ORDERING OF THESE VALUES. | |
83 enum FailureType { | |
84 FAILURE_FILTER_READ_OPEN, | |
85 FAILURE_FILTER_READ_VERSION, | |
86 FAILURE_FILTER_READ_NUM_KEYS, | |
87 FAILURE_FILTER_READ_KEY, | |
88 FAILURE_FILTER_READ_DATA_MINSIZE, | |
89 FAILURE_FILTER_READ_DATA_MAXSIZE, | |
90 FAILURE_FILTER_READ_DATA_SHORT, | |
91 FAILURE_FILTER_READ_DATA, | |
92 | |
93 // Memory space for histograms is determined by the max. ALWAYS | |
94 // ADD NEW VALUES BEFORE THIS ONE. | |
95 FAILURE_FILTER_MAX | |
96 }; | |
97 | |
98 static void RecordFailure(FailureType failure_type); | |
99 | |
100 ~BloomFilter(); | |
101 | |
102 int byte_size_; // size in bytes | |
103 int bit_size_; // size in bits | |
104 scoped_array<char> data_; | |
105 | |
106 // Random keys used for hashing. | |
107 HashKeys hash_keys_; | |
108 | |
109 // For debugging, used to verify that |hash_keys_| and |data_| have | |
110 // not changed other than via constructor or Insert(). | |
111 uint32 checksum_; | |
112 | |
113 DISALLOW_COPY_AND_ASSIGN(BloomFilter); | |
114 }; | |
115 | |
116 #endif // CHROME_BROWSER_SAFE_BROWSING_BLOOM_FILTER_H_ | |
OLD | NEW |