Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Side by Side Diff: chrome/common/metrics/entropy_provider_unittest.cc

Issue 10830318: Use a different algorithm with the low entropy source for field trials. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <cmath>
6 #include <limits>
7 #include <numeric>
8
9 #include "base/basictypes.h"
10 #include "base/guid.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/rand_util.h"
13 #include "base/string_number_conversions.h"
14 #include "chrome/common/metrics/entropy_provider.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16
17 namespace {
18
19 // Computes the standard deviation of distribution |values|.
20 double ComputeStandardDeviation(const std::vector<int>& values) {
21 const int sum = std::accumulate(values.begin(), values.end(), 0.0);
22 const int mean = sum / values.size();
Ilya Sherman 2012/08/21 03:42:27 nit: Should this be a double rather than an int?
Alexei Svitkine (slow) 2012/08/21 14:25:49 Changed to use doubles throughout the function. (
23 const int sum_of_squares = std::inner_product(values.begin(), values.end(),
24 values.begin(), 0.0);
25 const double variance = static_cast<double>(sum_of_squares) /
26 values.size() - (mean * mean);
27 return std::sqrt(variance);
28 }
29
30 } // namespace
31
32
33 class EntropyProviderTest : public testing::Test {
34 public:
35 // Computes SHA1-based entropy for the given |trial_name| based on
36 // |entropy_source|
37 double GenerateSHA1Entropy(const std::string& entropy_source,
38 const std::string& trial_name) {
39 SHA1EntropyProvider sha1_provider(entropy_source);
40 return sha1_provider.GetEntropyForTrial(trial_name);
41 }
42
43 // Generates permutation-based entropy for the given |trial_name| based on
44 // |entropy_source| which must be in the range [0, entropy_max).
45 double GeneratePermutedEntropy(uint16 entropy_source,
46 size_t entropy_max,
47 const std::string& trial_name) {
48 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
49 return permuted_provider.GetEntropyForTrial(trial_name);
50 }
51 };
52
53
54 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
55 // Simply asserts that two trials using one-time randomization
56 // that have different names, normally generate different results.
57 //
58 // Note that depending on the one-time random initialization, they
59 // _might_ actually give the same result, but we know that given
60 // the particular client_id we use for unit tests they won't.
61 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
62 scoped_refptr<base::FieldTrial> trials[] = {
63 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
64 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
65 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
66 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
67 };
68
69 for (size_t i = 0; i < arraysize(trials); ++i) {
70 trials[i]->UseOneTimeRandomization();
71
72 for (int j = 0; j < 100; ++j)
73 trials[i]->AppendGroup("", 1);
74 }
75
76 // The trials are most likely to give different results since they have
77 // different names.
78 ASSERT_NE(trials[0]->group(), trials[1]->group());
79 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name());
80 }
81
82 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
83 // Simply asserts that two trials using one-time randomization
84 // that have different names, normally generate different results.
85 //
86 // Note that depending on the one-time random initialization, they
87 // _might_ actually give the same result, but we know that given
88 // the particular client_id we use for unit tests they won't.
89 const size_t kMaxLowEntropySize = (1 << 13);
90 base::FieldTrialList field_trial_list(
91 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
92 scoped_refptr<base::FieldTrial> trials[] = {
93 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
94 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
95 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
96 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
97 };
98
99 for (size_t i = 0; i < arraysize(trials); ++i) {
100 trials[i]->UseOneTimeRandomization();
101
102 for (int j = 0; j < 100; ++j)
103 trials[i]->AppendGroup("", 1);
104 }
105
106 // The trials are most likely to give different results since they have
107 // different names.
108 ASSERT_NE(trials[0]->group(), trials[1]->group());
109 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name());
110 }
111
112 TEST_F(EntropyProviderTest, SHA1Entropy) {
113 const double results[] = {
114 GenerateSHA1Entropy("hi", "1"),
115 GenerateSHA1Entropy("there", "1"),
116 };
117 ASSERT_NE(results[0], results[1]);
118 for (size_t i = 0; i < arraysize(results); ++i) {
119 ASSERT_LE(0.0, results[i]);
120 ASSERT_GT(1.0, results[i]);
121 }
122
123 ASSERT_EQ(GenerateSHA1Entropy("yo", "1"),
124 GenerateSHA1Entropy("yo", "1"));
125 ASSERT_NE(GenerateSHA1Entropy("yo", "something"),
126 GenerateSHA1Entropy("yo", "else"));
127 }
128
129 TEST_F(EntropyProviderTest, PermutedEntropy) {
130 const size_t kMaxLowEntropySize = (1 << 13);
131 const double results[] = {
132 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
133 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1"),
134 };
135 ASSERT_NE(results[0], results[1]);
136 for (size_t i = 0; i < arraysize(results); ++i) {
137 ASSERT_LE(0.0, results[i]);
138 ASSERT_GT(1.0, results[i]);
139 }
140
141 ASSERT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
142 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
143 ASSERT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
144 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
145 }
146
147 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
148 const size_t kMaxLowEntropySize = (1 << 13);
149 const int kBucketCount = 20;
150 const int kIterationCount = 100000;
Ilya Sherman 2012/08/21 03:42:27 nit: size_t throughout?
Alexei Svitkine (slow) 2012/08/21 14:25:49 Done.
151
152 const std::string trial_names[] = {
153 "TestTrial",
154 "AnotherTestTrial",
155 "NewTabButton",
156 };
157
158 for (size_t i = 0; i < arraysize(trial_names); ++i) {
159 std::vector<int> distribution(kBucketCount);
160
161 for (int j = 0; j < kIterationCount; ++j) {
162 // Use a random GUID + 13 bits of entropy to match how the
163 // SHA1EntropyProvider is used in metrics_service.cc.
164 const int low_entropy_source =
165 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
166 const std::string high_entropy_source =
167 base::GenerateGUID() + base::IntToString(low_entropy_source);
168 const double entropy_value =
169 GenerateSHA1Entropy(high_entropy_source, trial_names[i]);
170 const int bucket = static_cast<int>(kBucketCount * entropy_value);
171 ASSERT_LT(bucket, kBucketCount);
172 distribution[bucket] += 1;
173 }
174
175 int min_value = std::numeric_limits<int>::max();
176 int max_value = std::numeric_limits<int>::min();
177 for (int j = 0; j < kBucketCount; ++j) {
178 min_value = std::min(min_value, distribution[j]);
179 max_value = std::max(max_value, distribution[j]);
180 }
181
182 // TODO(asvitkine): Figure out pass / fail criteria.
183 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value,
184 ComputeStandardDeviation(distribution));
185 }
186 }
187
188 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
189 const size_t kMaxLowEntropySize = (1 << 13);
190 const int kBucketCount = 20;
191 const int kIterationCount = 100000;
192
193 const std::string trial_names[] = {
194 "TestTrial",
195 "AnotherTestTrial",
196 "NewTabButton",
197 };
198
199 for (size_t i = 0; i < arraysize(trial_names); ++i) {
200 std::vector<int> distribution(kBucketCount);
201
202 // Note: Given a trial name, the computed mapping will be the same.
203 // As a performance optimization, pre-compute the mapping once per trial
204 // name and index into it each iteration.
Ilya Sherman 2012/08/21 03:42:27 Is this performance optimization needed for the te
Alexei Svitkine (slow) 2012/08/21 14:25:49 Unfortunately, yes it is needed. Without it, this
205 std::vector<uint16> mapping(kMaxLowEntropySize);
206 metrics_internal::PermuteMappingUsingTrialName(trial_names[i], &mapping);
207
208 for (int j = 0; j < kIterationCount; ++j) {
209 const int low_entropy_source =
210 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
211 const double entropy_value =
212 mapping[low_entropy_source] / static_cast<double>(kMaxLowEntropySize);
213 const int bucket = static_cast<int>(kBucketCount * entropy_value);
214 ASSERT_LT(bucket, kBucketCount);
215 distribution[bucket] += 1;
216 }
217
218 int min_value = std::numeric_limits<int>::max();
219 int max_value = std::numeric_limits<int>::min();
220 for (int j = 0; j < kBucketCount; ++j) {
221 min_value = std::min(min_value, distribution[j]);
222 max_value = std::max(max_value, distribution[j]);
223 }
224
225 // TODO(asvitkine): Figure out pass / fail criteria.
226 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value,
227 ComputeStandardDeviation(distribution));
228 }
229 }
230
231 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
232 // Verifies that SeededRandGenerator has a uniform distribution.
233 //
234 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
235
236 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
237 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
238 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
239 const int kMinAttempts = 1000;
240 const int kMaxAttempts = 1000000;
241
242 const std::string trial_names[] = {
243 "TestTrial",
244 "AnotherTestTrial",
245 "NewTabButton",
246 };
247
248 for (size_t i = 0; i < arraysize(trial_names); ++i) {
249 const uint32 seed = metrics_internal::HashName(trial_names[i]);
250 metrics_internal::SeededRandGenerator rand_generator(seed);
251
252 double cumulative_average = 0.0;
253 int count = 0;
254 while (count < kMaxAttempts) {
255 uint32 value = rand_generator(kTopOfRange);
256 cumulative_average = (count * cumulative_average + value) / (count + 1);
257
258 // Don't quit too quickly for things to start converging, or we may have
259 // a false positive.
260 if (count > kMinAttempts &&
261 kExpectedAverage - kAllowedVariance < cumulative_average &&
262 cumulative_average < kExpectedAverage + kAllowedVariance) {
263 break;
264 }
265
266 ++count;
267 }
268
269 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
270 kExpectedAverage << ", average ended at " << cumulative_average <<
271 ", for trial " << trial_names[i];
272 }
273 }
274
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698