OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <cmath> | |
6 #include <limits> | |
7 #include <numeric> | |
8 | |
9 #include "base/basictypes.h" | |
10 #include "base/guid.h" | |
11 #include "base/memory/scoped_ptr.h" | |
12 #include "base/rand_util.h" | |
13 #include "base/string_number_conversions.h" | |
14 #include "chrome/common/metrics/entropy_provider.h" | |
15 #include "testing/gtest/include/gtest/gtest.h" | |
16 | |
17 namespace { | |
18 | |
19 // Computes the standard deviation of distribution |values|. | |
20 double ComputeStandardDeviation(const std::vector<int>& values) { | |
21 const int sum = std::accumulate(values.begin(), values.end(), 0.0); | |
22 const int mean = sum / values.size(); | |
Ilya Sherman
2012/08/21 03:42:27
nit: Should this be a double rather than an int?
Alexei Svitkine (slow)
2012/08/21 14:25:49
Changed to use doubles throughout the function.
(
| |
23 const int sum_of_squares = std::inner_product(values.begin(), values.end(), | |
24 values.begin(), 0.0); | |
25 const double variance = static_cast<double>(sum_of_squares) / | |
26 values.size() - (mean * mean); | |
27 return std::sqrt(variance); | |
28 } | |
29 | |
30 } // namespace | |
31 | |
32 | |
33 class EntropyProviderTest : public testing::Test { | |
34 public: | |
35 // Computes SHA1-based entropy for the given |trial_name| based on | |
36 // |entropy_source| | |
37 double GenerateSHA1Entropy(const std::string& entropy_source, | |
38 const std::string& trial_name) { | |
39 SHA1EntropyProvider sha1_provider(entropy_source); | |
40 return sha1_provider.GetEntropyForTrial(trial_name); | |
41 } | |
42 | |
43 // Generates permutation-based entropy for the given |trial_name| based on | |
44 // |entropy_source| which must be in the range [0, entropy_max). | |
45 double GeneratePermutedEntropy(uint16 entropy_source, | |
46 size_t entropy_max, | |
47 const std::string& trial_name) { | |
48 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); | |
49 return permuted_provider.GetEntropyForTrial(trial_name); | |
50 } | |
51 }; | |
52 | |
53 | |
54 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) { | |
55 // Simply asserts that two trials using one-time randomization | |
56 // that have different names, normally generate different results. | |
57 // | |
58 // Note that depending on the one-time random initialization, they | |
59 // _might_ actually give the same result, but we know that given | |
60 // the particular client_id we use for unit tests they won't. | |
61 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); | |
62 scoped_refptr<base::FieldTrial> trials[] = { | |
63 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
64 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
65 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
66 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
67 }; | |
68 | |
69 for (size_t i = 0; i < arraysize(trials); ++i) { | |
70 trials[i]->UseOneTimeRandomization(); | |
71 | |
72 for (int j = 0; j < 100; ++j) | |
73 trials[i]->AppendGroup("", 1); | |
74 } | |
75 | |
76 // The trials are most likely to give different results since they have | |
77 // different names. | |
78 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
79 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
80 } | |
81 | |
82 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) { | |
83 // Simply asserts that two trials using one-time randomization | |
84 // that have different names, normally generate different results. | |
85 // | |
86 // Note that depending on the one-time random initialization, they | |
87 // _might_ actually give the same result, but we know that given | |
88 // the particular client_id we use for unit tests they won't. | |
89 const size_t kMaxLowEntropySize = (1 << 13); | |
90 base::FieldTrialList field_trial_list( | |
91 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); | |
92 scoped_refptr<base::FieldTrial> trials[] = { | |
93 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
94 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
95 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
96 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
97 }; | |
98 | |
99 for (size_t i = 0; i < arraysize(trials); ++i) { | |
100 trials[i]->UseOneTimeRandomization(); | |
101 | |
102 for (int j = 0; j < 100; ++j) | |
103 trials[i]->AppendGroup("", 1); | |
104 } | |
105 | |
106 // The trials are most likely to give different results since they have | |
107 // different names. | |
108 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
109 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
110 } | |
111 | |
112 TEST_F(EntropyProviderTest, SHA1Entropy) { | |
113 const double results[] = { | |
114 GenerateSHA1Entropy("hi", "1"), | |
115 GenerateSHA1Entropy("there", "1"), | |
116 }; | |
117 ASSERT_NE(results[0], results[1]); | |
118 for (size_t i = 0; i < arraysize(results); ++i) { | |
119 ASSERT_LE(0.0, results[i]); | |
120 ASSERT_GT(1.0, results[i]); | |
121 } | |
122 | |
123 ASSERT_EQ(GenerateSHA1Entropy("yo", "1"), | |
124 GenerateSHA1Entropy("yo", "1")); | |
125 ASSERT_NE(GenerateSHA1Entropy("yo", "something"), | |
126 GenerateSHA1Entropy("yo", "else")); | |
127 } | |
128 | |
129 TEST_F(EntropyProviderTest, PermutedEntropy) { | |
130 const size_t kMaxLowEntropySize = (1 << 13); | |
131 const double results[] = { | |
132 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
133 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1"), | |
134 }; | |
135 ASSERT_NE(results[0], results[1]); | |
136 for (size_t i = 0; i < arraysize(results); ++i) { | |
137 ASSERT_LE(0.0, results[i]); | |
138 ASSERT_GT(1.0, results[i]); | |
139 } | |
140 | |
141 ASSERT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
142 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); | |
143 ASSERT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), | |
144 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); | |
145 } | |
146 | |
147 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) { | |
148 const size_t kMaxLowEntropySize = (1 << 13); | |
149 const int kBucketCount = 20; | |
150 const int kIterationCount = 100000; | |
Ilya Sherman
2012/08/21 03:42:27
nit: size_t throughout?
Alexei Svitkine (slow)
2012/08/21 14:25:49
Done.
| |
151 | |
152 const std::string trial_names[] = { | |
153 "TestTrial", | |
154 "AnotherTestTrial", | |
155 "NewTabButton", | |
156 }; | |
157 | |
158 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
159 std::vector<int> distribution(kBucketCount); | |
160 | |
161 for (int j = 0; j < kIterationCount; ++j) { | |
162 // Use a random GUID + 13 bits of entropy to match how the | |
163 // SHA1EntropyProvider is used in metrics_service.cc. | |
164 const int low_entropy_source = | |
165 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
166 const std::string high_entropy_source = | |
167 base::GenerateGUID() + base::IntToString(low_entropy_source); | |
168 const double entropy_value = | |
169 GenerateSHA1Entropy(high_entropy_source, trial_names[i]); | |
170 const int bucket = static_cast<int>(kBucketCount * entropy_value); | |
171 ASSERT_LT(bucket, kBucketCount); | |
172 distribution[bucket] += 1; | |
173 } | |
174 | |
175 int min_value = std::numeric_limits<int>::max(); | |
176 int max_value = std::numeric_limits<int>::min(); | |
177 for (int j = 0; j < kBucketCount; ++j) { | |
178 min_value = std::min(min_value, distribution[j]); | |
179 max_value = std::max(max_value, distribution[j]); | |
180 } | |
181 | |
182 // TODO(asvitkine): Figure out pass / fail criteria. | |
183 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value, | |
184 ComputeStandardDeviation(distribution)); | |
185 } | |
186 } | |
187 | |
188 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) { | |
189 const size_t kMaxLowEntropySize = (1 << 13); | |
190 const int kBucketCount = 20; | |
191 const int kIterationCount = 100000; | |
192 | |
193 const std::string trial_names[] = { | |
194 "TestTrial", | |
195 "AnotherTestTrial", | |
196 "NewTabButton", | |
197 }; | |
198 | |
199 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
200 std::vector<int> distribution(kBucketCount); | |
201 | |
202 // Note: Given a trial name, the computed mapping will be the same. | |
203 // As a performance optimization, pre-compute the mapping once per trial | |
204 // name and index into it each iteration. | |
Ilya Sherman
2012/08/21 03:42:27
Is this performance optimization needed for the te
Alexei Svitkine (slow)
2012/08/21 14:25:49
Unfortunately, yes it is needed. Without it, this
| |
205 std::vector<uint16> mapping(kMaxLowEntropySize); | |
206 metrics_internal::PermuteMappingUsingTrialName(trial_names[i], &mapping); | |
207 | |
208 for (int j = 0; j < kIterationCount; ++j) { | |
209 const int low_entropy_source = | |
210 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
211 const double entropy_value = | |
212 mapping[low_entropy_source] / static_cast<double>(kMaxLowEntropySize); | |
213 const int bucket = static_cast<int>(kBucketCount * entropy_value); | |
214 ASSERT_LT(bucket, kBucketCount); | |
215 distribution[bucket] += 1; | |
216 } | |
217 | |
218 int min_value = std::numeric_limits<int>::max(); | |
219 int max_value = std::numeric_limits<int>::min(); | |
220 for (int j = 0; j < kBucketCount; ++j) { | |
221 min_value = std::min(min_value, distribution[j]); | |
222 max_value = std::max(max_value, distribution[j]); | |
223 } | |
224 | |
225 // TODO(asvitkine): Figure out pass / fail criteria. | |
226 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value, | |
227 ComputeStandardDeviation(distribution)); | |
228 } | |
229 } | |
230 | |
231 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) { | |
232 // Verifies that SeededRandGenerator has a uniform distribution. | |
233 // | |
234 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. | |
235 | |
236 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; | |
237 const uint32 kExpectedAverage = kTopOfRange / 2ULL; | |
238 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% | |
239 const int kMinAttempts = 1000; | |
240 const int kMaxAttempts = 1000000; | |
241 | |
242 const std::string trial_names[] = { | |
243 "TestTrial", | |
244 "AnotherTestTrial", | |
245 "NewTabButton", | |
246 }; | |
247 | |
248 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
249 const uint32 seed = metrics_internal::HashName(trial_names[i]); | |
250 metrics_internal::SeededRandGenerator rand_generator(seed); | |
251 | |
252 double cumulative_average = 0.0; | |
253 int count = 0; | |
254 while (count < kMaxAttempts) { | |
255 uint32 value = rand_generator(kTopOfRange); | |
256 cumulative_average = (count * cumulative_average + value) / (count + 1); | |
257 | |
258 // Don't quit too quickly for things to start converging, or we may have | |
259 // a false positive. | |
260 if (count > kMinAttempts && | |
261 kExpectedAverage - kAllowedVariance < cumulative_average && | |
262 cumulative_average < kExpectedAverage + kAllowedVariance) { | |
263 break; | |
264 } | |
265 | |
266 ++count; | |
267 } | |
268 | |
269 ASSERT_LT(count, kMaxAttempts) << "Expected average was " << | |
270 kExpectedAverage << ", average ended at " << cumulative_average << | |
271 ", for trial " << trial_names[i]; | |
272 } | |
273 } | |
274 | |
OLD | NEW |