OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <cmath> | |
6 #include <limits> | |
7 #include <numeric> | |
8 | |
9 #include "base/basictypes.h" | |
10 #include "base/guid.h" | |
11 #include "base/memory/scoped_ptr.h" | |
12 #include "base/rand_util.h" | |
13 #include "base/string_number_conversions.h" | |
14 #include "chrome/common/metrics/entropy_provider.h" | |
15 #include "testing/gtest/include/gtest/gtest.h" | |
16 | |
17 namespace metrics { | |
18 | |
19 namespace { | |
20 | |
21 // Computes the Chi-Square statistic for |values| assuming they follow a uniform | |
22 // distribution, where each entry has expected value |expected_value|. | |
23 // | |
24 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed | |
25 // value and E is the expected value. | |
26 double ComputeChiSquare(const std::vector<int>& values, | |
27 double expected_value) { | |
28 double sum = 0; | |
29 for (size_t i = 0; i < values.size(); ++i) { | |
30 const double delta = values[i] - expected_value; | |
31 sum += (delta * delta) / expected_value; | |
32 } | |
33 return sum; | |
34 } | |
35 | |
36 } // namespace | |
37 | |
38 | |
39 class EntropyProviderTest : public testing::Test { | |
40 public: | |
41 // Computes SHA1-based entropy for the given |trial_name| based on | |
42 // |entropy_source| | |
43 double GenerateSHA1Entropy(const std::string& entropy_source, | |
44 const std::string& trial_name) { | |
45 SHA1EntropyProvider sha1_provider(entropy_source); | |
46 return sha1_provider.GetEntropyForTrial(trial_name); | |
47 } | |
48 | |
49 // Generates permutation-based entropy for the given |trial_name| based on | |
50 // |entropy_source| which must be in the range [0, entropy_max). | |
51 double GeneratePermutedEntropy(uint16 entropy_source, | |
52 size_t entropy_max, | |
53 const std::string& trial_name) { | |
54 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); | |
55 return permuted_provider.GetEntropyForTrial(trial_name); | |
56 } | |
57 }; | |
58 | |
59 | |
60 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) { | |
61 // Simply asserts that two trials using one-time randomization | |
62 // that have different names, normally generate different results. | |
63 // | |
64 // Note that depending on the one-time random initialization, they | |
65 // _might_ actually give the same result, but we know that given | |
66 // the particular client_id we use for unit tests they won't. | |
67 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); | |
68 scoped_refptr<base::FieldTrial> trials[] = { | |
69 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
70 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
71 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
72 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
73 }; | |
74 | |
75 for (size_t i = 0; i < arraysize(trials); ++i) { | |
76 trials[i]->UseOneTimeRandomization(); | |
77 | |
78 for (int j = 0; j < 100; ++j) | |
79 trials[i]->AppendGroup("", 1); | |
80 } | |
81 | |
82 // The trials are most likely to give different results since they have | |
83 // different names. | |
84 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
85 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
86 } | |
87 | |
88 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) { | |
89 // Simply asserts that two trials using one-time randomization | |
90 // that have different names, normally generate different results. | |
91 // | |
92 // Note that depending on the one-time random initialization, they | |
93 // _might_ actually give the same result, but we know that given | |
94 // the particular client_id we use for unit tests they won't. | |
95 const size_t kMaxLowEntropySize = (1 << 13); | |
96 base::FieldTrialList field_trial_list( | |
97 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); | |
98 scoped_refptr<base::FieldTrial> trials[] = { | |
99 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
100 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
101 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
102 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
103 }; | |
104 | |
105 for (size_t i = 0; i < arraysize(trials); ++i) { | |
106 trials[i]->UseOneTimeRandomization(); | |
107 | |
108 for (int j = 0; j < 100; ++j) | |
109 trials[i]->AppendGroup("", 1); | |
110 } | |
111 | |
112 // The trials are most likely to give different results since they have | |
113 // different names. | |
114 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
115 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
116 } | |
117 | |
118 TEST_F(EntropyProviderTest, SHA1Entropy) { | |
119 const double results[] = { | |
120 GenerateSHA1Entropy("hi", "1"), | |
121 GenerateSHA1Entropy("there", "1"), | |
122 }; | |
123 ASSERT_NE(results[0], results[1]); | |
124 for (size_t i = 0; i < arraysize(results); ++i) { | |
125 ASSERT_LE(0.0, results[i]); | |
126 ASSERT_GT(1.0, results[i]); | |
127 } | |
128 | |
129 ASSERT_EQ(GenerateSHA1Entropy("yo", "1"), | |
130 GenerateSHA1Entropy("yo", "1")); | |
131 ASSERT_NE(GenerateSHA1Entropy("yo", "something"), | |
132 GenerateSHA1Entropy("yo", "else")); | |
133 } | |
134 | |
135 TEST_F(EntropyProviderTest, PermutedEntropy) { | |
136 const size_t kMaxLowEntropySize = (1 << 13); | |
137 const double results[] = { | |
138 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
139 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1"), | |
140 }; | |
141 ASSERT_NE(results[0], results[1]); | |
142 for (size_t i = 0; i < arraysize(results); ++i) { | |
143 ASSERT_LE(0.0, results[i]); | |
144 ASSERT_GT(1.0, results[i]); | |
145 } | |
146 | |
147 ASSERT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
148 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); | |
149 ASSERT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), | |
150 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); | |
151 } | |
152 | |
153 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) { | |
154 // Size of the low entropy source to append to the high entropy source for | |
155 // input to the SHA1 entropy provider. | |
156 const size_t kMaxLowEntropySize = (1 << 13); | |
157 // Number of buckets in the simulated field trials. | |
158 const size_t kBucketCount = 20; | |
159 // Max number of iterations to perform before giving up and failing. | |
160 const size_t kMaxIterationCount = 100000; | |
161 // The number of iterations to perform before each time the statistical | |
162 // significance of the results is checked. | |
163 const size_t kCheckIterationCount = 10000; | |
Ilya Sherman
2012/08/22 04:07:42
Is it really important to check every 10000 iterat
Alexei Svitkine (slow)
2012/08/22 16:53:27
The test verifies whether the observed distributio
| |
164 // This is the Chi-Square threshold from the Chi-Square statistic table for | |
165 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence | |
166 // level. See: http://www.medcalc.org/manual/chi-square-table.php | |
167 const double kChiSquareThreshold = 43.82; | |
168 | |
169 const std::string trial_names[] = { | |
170 "TestTrial", | |
171 "AnotherTestTrial", | |
172 "NewTabButton", | |
173 }; | |
174 | |
175 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
176 std::vector<int> distribution(kBucketCount); | |
177 | |
178 for (size_t j = 0; j < kMaxIterationCount; ++j) { | |
Ilya Sherman
2012/08/22 04:07:42
nit: Perhaps this loop should start from j = 1, so
Alexei Svitkine (slow)
2012/08/22 16:53:27
Done.
| |
179 // Use a random GUID + 13 additional bits of entropy to match how the | |
180 // SHA1EntropyProvider is used in metrics_service.cc. | |
181 const int low_entropy_source = | |
182 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
183 const std::string high_entropy_source = | |
184 base::GenerateGUID() + base::IntToString(low_entropy_source); | |
185 const double entropy_value = | |
186 GenerateSHA1Entropy(high_entropy_source, trial_names[i]); | |
187 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value); | |
188 ASSERT_LT(bucket, kBucketCount); | |
189 distribution[bucket] += 1; | |
190 | |
191 // After |kCheckIterationCount| iterations, compute the Chi-Square | |
192 // statistic of the distribution. If the resulting statistic is greater | |
193 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence | |
194 // that the observed samples do not follow a uniform distribution. | |
195 // | |
196 // However, since 99.9% would still result in a false negative every | |
197 // 1000 runs of the test, do not treat it as a failure (else the test | |
198 // will be flaky). Instead, perform additional iterations to determine | |
199 // if the distribution will converge, up to |kMaxIterationCount|. | |
Ilya Sherman
2012/08/22 04:07:42
Perhaps instead of running the test multiple times
Alexei Svitkine (slow)
2012/08/22 16:53:27
Using a threshold that has a higher confidence doe
| |
200 if (((j + 1) % kCheckIterationCount) == 0) { | |
201 const double expected_value_per_bucket = | |
202 static_cast<double>(j + 1) / kBucketCount; | |
203 const double chi_square = | |
204 ComputeChiSquare(distribution, expected_value_per_bucket); | |
205 if (chi_square < kChiSquareThreshold) | |
206 break; | |
207 | |
208 // If |j == kMaxIterationCount - 1|, the Chi-Square statistic did not | |
209 // converge after |kMaxIterationCount|. | |
210 ASSERT_NE(j, kMaxIterationCount - 1) << "Failed for trial " << | |
211 trial_names[i] << " with chi_square = " << chi_square << | |
212 " after " << kMaxIterationCount << " iterations."; | |
Ilya Sherman
2012/08/22 04:07:42
This test should choose a random seed for the rand
Alexei Svitkine (slow)
2012/08/22 16:53:27
The random number generator used for the original
| |
213 } | |
214 } | |
215 } | |
216 } | |
217 | |
218 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) { | |
219 // Size of the low entropy source to use for the permuted entropy provider. | |
220 const size_t kMaxLowEntropySize = (1 << 13); | |
221 // Number of buckets in the simulated field trials. | |
222 const size_t kBucketCount = 20; | |
223 // Max number of iterations to perform before giving up and failing. | |
224 const size_t kMaxIterationCount = 100000; | |
225 // The number of iterations to perform before each time the statistical | |
226 // significance of the results is checked. | |
227 const size_t kCheckIterationCount = 10000; | |
228 // This is the Chi-Square threshold from the Chi-Square statistic table for | |
229 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence | |
230 // level. See: http://www.medcalc.org/manual/chi-square-table.php | |
231 const double kChiSquareThreshold = 43.82; | |
Ilya Sherman
2012/08/22 04:07:42
nit: Almost all of these constants are repeated fr
Alexei Svitkine (slow)
2012/08/22 16:53:27
Refactored the two tests to share a majority of th
| |
232 | |
233 const std::string trial_names[] = { | |
234 "TestTrial", | |
235 "AnotherTestTrial", | |
236 "NewTabButton", | |
237 }; | |
238 | |
239 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
240 std::vector<int> distribution(kBucketCount); | |
241 | |
242 // Note: Given a trial name, the computed mapping will be the same. | |
243 // As a performance optimization, pre-compute the mapping once per trial | |
244 // name and index into it each iteration. | |
245 std::vector<uint16> mapping(kMaxLowEntropySize); | |
246 internal::PermuteMappingUsingTrialName(trial_names[i], &mapping); | |
247 | |
248 for (size_t j = 0; j < kMaxIterationCount; ++j) { | |
249 const int low_entropy_source = | |
250 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
251 const double entropy_value = | |
252 mapping[low_entropy_source] / static_cast<double>(kMaxLowEntropySize); | |
253 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value); | |
254 ASSERT_LT(bucket, kBucketCount); | |
255 distribution[bucket] += 1; | |
256 | |
257 // After |kCheckIterationCount| iterations, compute the Chi-Square | |
258 // statistic of the distribution. If the resulting statistic is greater | |
259 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence | |
260 // that the observed samples do not follow a uniform distribution. | |
261 // | |
262 // However, since 99.9% would still result in a false negative every | |
263 // 1000 runs of the test, do not treat it as a failure (else the test | |
264 // will be flaky). Instead, perform additional iterations to determine | |
265 // if the distribution will converge, up to |kMaxIterationCount|. | |
266 if (((j + 1) % kCheckIterationCount) == 0) { | |
267 const double expected_value_per_bucket = | |
268 static_cast<double>(j + 1) / kBucketCount; | |
269 const double chi_square = | |
270 ComputeChiSquare(distribution, expected_value_per_bucket); | |
271 if (chi_square < kChiSquareThreshold) | |
272 break; | |
273 | |
274 // If |j == kMaxIterationCount - 1|, the Chi-Square statistic did not | |
275 // converge after |kMaxIterationCount|. | |
276 ASSERT_NE(j, kMaxIterationCount - 1) << "Failed for trial " << | |
277 trial_names[i] << " with chi_square = " << chi_square << | |
278 " after " << kMaxIterationCount << " iterations."; | |
279 } | |
280 } | |
281 } | |
Ilya Sherman
2012/08/22 04:07:42
nit: There is a lot of repeated code with the abov
Alexei Svitkine (slow)
2012/08/22 16:53:27
Done.
| |
282 } | |
283 | |
284 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) { | |
285 // Verifies that SeededRandGenerator has a uniform distribution. | |
286 // | |
287 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. | |
288 | |
289 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; | |
290 const uint32 kExpectedAverage = kTopOfRange / 2ULL; | |
291 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% | |
292 const int kMinAttempts = 1000; | |
293 const int kMaxAttempts = 1000000; | |
294 | |
295 const std::string trial_names[] = { | |
296 "TestTrial", | |
297 "AnotherTestTrial", | |
298 "NewTabButton", | |
299 }; | |
300 | |
301 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
302 const uint32 seed = internal::HashName(trial_names[i]); | |
303 internal::SeededRandGenerator rand_generator(seed); | |
304 | |
305 double cumulative_average = 0.0; | |
306 int count = 0; | |
307 while (count < kMaxAttempts) { | |
308 uint32 value = rand_generator(kTopOfRange); | |
309 cumulative_average = (count * cumulative_average + value) / (count + 1); | |
310 | |
311 // Don't quit too quickly for things to start converging, or we may have | |
312 // a false positive. | |
313 if (count > kMinAttempts && | |
314 kExpectedAverage - kAllowedVariance < cumulative_average && | |
315 cumulative_average < kExpectedAverage + kAllowedVariance) { | |
316 break; | |
317 } | |
318 | |
319 ++count; | |
320 } | |
321 | |
322 ASSERT_LT(count, kMaxAttempts) << "Expected average was " << | |
323 kExpectedAverage << ", average ended at " << cumulative_average << | |
324 ", for trial " << trial_names[i]; | |
325 } | |
326 } | |
327 | |
328 } // namespace metrics | |
OLD | NEW |