Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(436)

Unified Diff: base/metrics/histogram.cc

Issue 10829466: SampleSet -> HistogramSamples (will be reused by SparseHistogram) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/metrics/histogram.cc
===================================================================
--- base/metrics/histogram.cc (revision 152603)
+++ base/metrics/histogram.cc (working copy)
@@ -31,7 +31,137 @@
typedef HistogramBase::Count Count;
typedef HistogramBase::Sample Sample;
+BucketHistogramSamples::BucketHistogramSamples(
+ const BucketRanges* bucket_ranges)
+ : counts_(bucket_ranges->size() - 1),
+ bucket_ranges_(bucket_ranges) {
+ CHECK_GE(bucket_ranges_->size(), 2u);
Ilya Sherman 2012/08/23 07:50:54 nit: Does this need to be a CHECK, or would a DCHE
kaiwang 2012/08/24 04:17:58 This check has litter performance impact, I don't
Ilya Sherman 2012/08/29 08:48:16 That is an argument for only ever using CHECKs, an
Ilya Sherman 2012/08/29 23:46:00 Brett or John, could one of you chime in on this q
+}
+BucketHistogramSamples::~BucketHistogramSamples() {}
+
+void BucketHistogramSamples::Accumulate(Sample value, Count count) {
+ size_t i = BucketIndex(value, bucket_ranges_);
Ilya Sherman 2012/08/23 07:50:54 nit: i -> bucket (prefer to use descriptive names)
kaiwang 2012/08/24 04:17:58 i -> bucket_index
+ counts_[i] += count;
+ sum_ += count * value;
Ilya Sherman 2012/08/23 07:50:54 Any need to check for overflow?
kaiwang 2012/08/24 04:17:58 sum is int64 and count/value are both int32. Also,
Ilya Sherman 2012/08/29 08:48:16 Why isn't clamping to the max value for int64 the
+ redundant_count_ += count;
+}
+
+Count BucketHistogramSamples::GetCount(Sample value) const {
+ size_t i = BucketIndex(value, bucket_ranges_);
+ return counts_[i];
+}
+
+Count BucketHistogramSamples::TotalCount() const {
+ Count count = 0;
+ for (size_t i = 0; i < counts_.size(); i++) {
+ count += counts_[i];
+ }
+ return count;
+}
+
+Count BucketHistogramSamples::GetCountFromBucketIndex(
+ size_t bucket_index) const {
+ return counts_[bucket_index];
+}
+
+scoped_ptr<SampleCountIterator> BucketHistogramSamples::Iterator() const {
+ return scoped_ptr<SampleCountIterator>(
+ new BucketHistogramSamplesIterator(&counts_, bucket_ranges_));
+}
+
+// Use simple binary search. This is very general, but there are better
+// approaches if we knew that the buckets were linearly distributed.
+//
// static
+size_t BucketHistogramSamples::BucketIndex(Sample value,
+ const BucketRanges* bucket_ranges) {
+ size_t bucket_count = bucket_ranges->size() - 1;
+ CHECK_GE(bucket_count, 1u);
+ CHECK_GE(value, bucket_ranges->range(0));
+ CHECK_LT(value, bucket_ranges->range(bucket_count));
+
+ size_t under = 0;
+ size_t over = bucket_count;
+ size_t mid;
+ do {
+ mid = under + (over - under)/2;
+ if (mid == under)
+ break;
+ if (bucket_ranges->range(mid) <= value)
+ under = mid;
+ else
+ over = mid;
+ } while (true);
Ilya Sherman 2012/08/23 07:50:54 Can we use a standard binary search implementation
kaiwang 2012/08/24 04:17:58 This code is copied from below. I think for refact
+ return mid;
+}
+
+bool BucketHistogramSamples::AddSubtractHelper(SampleCountIterator* iter,
+ bool is_add) {
+ HistogramBase::Sample min;
+ HistogramBase::Sample max;
+ HistogramBase::Count count;
+
+ size_t index = 0;
+ while (index < counts_.size() && !iter->Done()) {
Ilya Sherman 2012/08/23 07:50:54 Please add a comment describing what this double-i
kaiwang 2012/08/24 04:17:58 Done.
+ iter->Get(&min, &max, &count);
+ if (min == bucket_ranges_->range(index) &&
+ max == bucket_ranges_->range(index + 1)) {
+ // Bucket match!
+ counts_[index] += is_add ? count : - count;
Ilya Sherman 2012/08/23 07:50:54 nit: The unary minus operator should not be follow
kaiwang 2012/08/24 04:17:58 Done.
+ iter->Next();
+ } else if (min > bucket_ranges_->range(index)) {
+ index++;
+ } else {
+ return false;
+ }
+ }
+
+ return iter->Done();
+}
+
+BucketHistogramSamplesIterator::BucketHistogramSamplesIterator(
+ const vector<Count>* counts,
+ const BucketRanges* bucket_ranges)
+ : counts_(counts),
+ bucket_ranges_(bucket_ranges),
+ index_(-1),
+ is_done_(false) {
+ CHECK_GT(bucket_ranges_->size(), counts_->size());
+ ForwardIndex();
+}
+
+bool BucketHistogramSamplesIterator::Done() {
+ return is_done_;
+}
+
+void BucketHistogramSamplesIterator::Next() {
+ ForwardIndex();
+}
+
+void BucketHistogramSamplesIterator::Get(HistogramBase::Sample* min,
+ HistogramBase::Sample* max,
+ HistogramBase::Count* count) {
+ CHECK(!Done());
+ if (min != NULL)
Ilya Sherman 2012/08/23 07:50:54 nit: I saw nothing in the method description that
kaiwang 2012/08/24 04:17:58 added to interface comment
+ *min = bucket_ranges_->range(index_);
+ if (max != NULL)
+ *max = bucket_ranges_->range(index_ + 1);
+ if (count != NULL)
+ *count = (*counts_)[index_];
+}
+
+void BucketHistogramSamplesIterator::ForwardIndex() {
+ CHECK(!Done());
+
+ while (index_ < static_cast<int>(counts_->size()) - 1) {
+ index_++;
+ if ((*counts_)[index_] != 0)
+ return;
+ }
+ is_done_ = true;
+}
+
+// static
const size_t Histogram::kBucketCount_MAX = 16384u;
Histogram::SampleSet::SampleSet(size_t size)
@@ -245,26 +375,26 @@
ranges->ResetChecksum();
}
-// static
void Histogram::Add(int value) {
- if (value > kSampleType_MAX - 1)
- value = kSampleType_MAX - 1;
- if (value < 0)
- value = 0;
- size_t index = BucketIndex(value);
- DCHECK_GE(value, ranges(index));
- DCHECK_LT(value, ranges(index + 1));
- Accumulate(value, 1, index);
+ if (value > ranges(bucket_count_) - 1)
jar (doing other things) 2012/08/30 22:54:38 You appear to have changed a constant comparisons
kaiwang 2012/09/07 01:14:11 Done.
+ value = ranges(bucket_count_) - 1;
+ if (value < ranges(0))
+ value = ranges(0);
+ samples_->Accumulate(value, 1);
}
void Histogram::AddBoolean(bool value) {
DCHECK(false);
}
-void Histogram::AddSampleSet(const SampleSet& sample) {
- sample_.Add(sample);
+void Histogram::AddSamples(const HistogramSamples& samples) {
+ samples_->Add(samples);
}
+bool Histogram::AddSamples(PickleIterator* iter) {
+ return samples_->Add(iter);
+}
+
void Histogram::SetRangeDescriptions(const DescriptionPair descriptions[]) {
DCHECK(false);
}
@@ -283,7 +413,7 @@
// static
string Histogram::SerializeHistogramInfo(const Histogram& histogram,
- const SampleSet& snapshot) {
+ const HistogramSamples& snapshot) {
DCHECK_NE(NOT_VALID_IN_RENDERER, histogram.histogram_type());
DCHECK(histogram.bucket_ranges()->HasValidChecksum());
@@ -296,10 +426,10 @@
pickle.WriteInt(histogram.histogram_type());
pickle.WriteInt(histogram.flags());
+ histogram.SerializeRanges(&pickle);
Ilya Sherman 2012/08/23 07:50:54 Why did you move this line?
kaiwang 2012/08/24 04:17:58 this is part of "histogram info", while the code b
+
snapshot.Serialize(&pickle);
- histogram.SerializeRanges(&pickle);
-
return string(static_cast<const char*>(pickle.data()), pickle.size());
}
@@ -318,7 +448,6 @@
uint32 range_checksum;
int histogram_type;
int pickle_flags;
- SampleSet sample;
PickleIterator iter(pickle);
if (!iter.ReadString(&histogram_name) ||
@@ -327,8 +456,7 @@
!iter.ReadUInt64(&bucket_count) ||
!iter.ReadUInt32(&range_checksum) ||
!iter.ReadInt(&histogram_type) ||
- !iter.ReadInt(&pickle_flags) ||
- !sample.Histogram::SampleSet::Deserialize(&iter)) {
jar (doing other things) 2012/08/30 22:54:38 How are these samples recovered? I don't see the
kaiwang 2012/09/07 01:14:11 See line 387, it adds all samples from a PickleIte
+ !iter.ReadInt(&pickle_flags)) {
DLOG(ERROR) << "Pickle error decoding Histogram: " << histogram_name;
return false;
}
@@ -382,23 +510,22 @@
if (render_histogram->flags() & kIPCSerializationSourceFlag) {
DVLOG(1) << "Single process mode, histogram observed and not copied: "
<< histogram_name;
- } else {
- DCHECK_EQ(flags & render_histogram->flags(), flags);
- render_histogram->AddSampleSet(sample);
+ return true;
}
- return true;
+ DCHECK_EQ(flags & render_histogram->flags(), flags);
+
Ilya Sherman 2012/08/23 07:50:54 nit: No need for this blank line.
kaiwang 2012/08/24 04:17:58 Done.
+ return render_histogram->AddSamples(&iter);
}
+// static
+const int Histogram::kCommonRaceBasedCountMismatch = 5;
Ilya Sherman 2012/08/23 07:50:54 Why did you move this constant into the Histogram
kaiwang 2012/08/24 04:17:58 It's needed in histogram_snapshot_manager.cc The o
-// Validate a sample and related histogram.
Ilya Sherman 2012/08/23 07:50:54 nit: Why did you remove this comment?
kaiwang 2012/08/24 04:17:58 better function comment in .h file
Histogram::Inconsistencies Histogram::FindCorruption(
- const SampleSet& snapshot) const {
+ const HistogramSamples& samples) const {
int inconsistencies = NO_INCONSISTENCIES;
Sample previous_range = -1; // Bottom range is always 0.
- int64 count = 0;
for (size_t index = 0; index < bucket_count(); ++index) {
- count += snapshot.counts(index);
int new_range = ranges(index);
if (previous_range >= new_range)
inconsistencies |= BUCKET_ORDER_ERROR;
@@ -408,20 +535,11 @@
if (!bucket_ranges()->HasValidChecksum())
inconsistencies |= RANGE_CHECKSUM_ERROR;
- int64 delta64 = snapshot.redundant_count() - count;
+ int64 delta64 = samples.redundant_count() - samples.TotalCount();
if (delta64 != 0) {
int delta = static_cast<int>(delta64);
if (delta != delta64)
delta = INT_MAX; // Flag all giant errors as INT_MAX.
- // Since snapshots of histograms are taken asynchronously relative to
- // sampling (and snapped from different threads), it is pretty likely that
- // we'll catch a redundant count that doesn't match the sample count. We
- // allow for a certain amount of slop before flagging this as an
- // inconsistency. Even with an inconsistency, we'll snapshot it again (for
- // UMA in about a half hour, so we'll eventually get the data, if it was
- // not the result of a corruption. If histograms show that 1 is "too tight"
- // then we may try to use 2 or 3 for this slop value.
- const int kCommonRaceBasedCountMismatch = 5;
if (delta > 0) {
UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta);
if (delta > kCommonRaceBasedCountMismatch)
@@ -448,11 +566,11 @@
return bucket_count_;
}
-// Do a safe atomic snapshot of sample data.
-// This implementation assumes we are on a safe single thread.
-void Histogram::SnapshotSample(SampleSet* sample) const {
- // Note locking not done in this version!!!
- *sample = sample_;
+scoped_ptr<BucketHistogramSamples> Histogram::SnapshotSamples() const {
+ scoped_ptr<BucketHistogramSamples> samples(
+ new BucketHistogramSamples(bucket_ranges()));
+ samples->Add(*samples_);
+ return samples.Pass();
}
bool Histogram::HasConstructionArguments(Sample minimum,
@@ -471,8 +589,11 @@
bucket_ranges_(ranges),
declared_min_(minimum),
declared_max_(maximum),
- bucket_count_(bucket_count),
- sample_(bucket_count) {}
+ bucket_count_(bucket_count) {
+ if (ranges) {
+ samples_.reset(new BucketHistogramSamples(ranges));
+ }
Ilya Sherman 2012/08/23 07:50:54 nit: No need for curly braces
kaiwang 2012/08/24 04:17:58 Done.
+}
Histogram::~Histogram() {
if (StatisticsRecorder::dump_on_exit()) {
@@ -519,31 +640,6 @@
return true;
}
-size_t Histogram::BucketIndex(Sample value) const {
- // Use simple binary search. This is very general, but there are better
- // approaches if we knew that the buckets were linearly distributed.
- DCHECK_LE(ranges(0), value);
- DCHECK_GT(ranges(bucket_count()), value);
- size_t under = 0;
- size_t over = bucket_count();
- size_t mid;
-
- do {
- DCHECK_GE(over, under);
- mid = under + (over - under)/2;
- if (mid == under)
- break;
- if (ranges(mid) <= value)
- under = mid;
- else
- over = mid;
- } while (true);
-
- DCHECK_LE(ranges(mid), value);
- CHECK_GT(ranges(mid+1), value);
- return mid;
-}
-
// Use the actual bucket widths (like a linear histogram) until the widths get
// over some transition value, and then use that transition width. Exponentials
// get so big so fast (and we don't expect to see a lot of entries in the large
@@ -567,12 +663,6 @@
return result;
}
-// Update histogram data with new sample.
-void Histogram::Accumulate(Sample value, Count count, size_t index) {
- // Note locking not done in this version!!!
Ilya Sherman 2012/08/23 07:50:54 nit: Please keep this comment around by the releva
kaiwang 2012/08/24 04:17:58 this function is not needed any more. All Histogra
- sample_.Accumulate(value, count, index);
-}
-
//------------------------------------------------------------------------------
// Private methods
@@ -581,22 +671,21 @@
string* output) const {
// Get local (stack) copies of all effectively volatile class data so that we
// are consistent across our output activities.
- SampleSet snapshot;
- SnapshotSample(&snapshot);
- Count sample_count = snapshot.TotalCount();
+ scoped_ptr<BucketHistogramSamples> snapshot = SnapshotSamples();
+ Count sample_count = snapshot->TotalCount();
- WriteAsciiHeader(snapshot, sample_count, output);
+ WriteAsciiHeader(*snapshot, sample_count, output);
output->append(newline);
// Prepare to normalize graphical rendering of bucket contents.
double max_size = 0;
if (graph_it)
- max_size = GetPeakBucketSize(snapshot);
+ max_size = GetPeakBucketSize(*snapshot);
// Calculate space needed to print bucket range numbers. Leave room to print
// nearly the largest bucket range without sliding over the histogram.
size_t largest_non_empty_bucket = bucket_count() - 1;
- while (0 == snapshot.counts(largest_non_empty_bucket)) {
+ while (0 == snapshot->GetCountFromBucketIndex(largest_non_empty_bucket)) {
if (0 == largest_non_empty_bucket)
break; // All buckets are empty.
--largest_non_empty_bucket;
@@ -605,7 +694,7 @@
// Calculate largest print width needed for any of our bucket range displays.
size_t print_width = 1;
for (size_t i = 0; i < bucket_count(); ++i) {
- if (snapshot.counts(i)) {
+ if (snapshot->GetCountFromBucketIndex(i)) {
size_t width = GetAsciiBucketRange(i).size() + 1;
if (width > print_width)
print_width = width;
@@ -616,7 +705,7 @@
int64 past = 0;
// Output the actual histogram graph.
for (size_t i = 0; i < bucket_count(); ++i) {
- Count current = snapshot.counts(i);
+ Count current = snapshot->GetCountFromBucketIndex(i);
if (!current && !PrintEmptyBucket(i))
continue;
remaining -= current;
@@ -624,9 +713,12 @@
output->append(range);
for (size_t j = 0; range.size() + j < print_width + 1; ++j)
output->push_back(' ');
- if (0 == current && i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) {
- while (i < bucket_count() - 1 && 0 == snapshot.counts(i + 1))
+ if (0 == current && i < bucket_count() - 1 &&
+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {
+ while (i < bucket_count() - 1 &&
+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {
++i;
+ }
output->append("... ");
output->append(newline);
continue; // No reason to plot emptiness.
@@ -641,17 +733,18 @@
DCHECK_EQ(sample_count, past);
}
-double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const {
+double Histogram::GetPeakBucketSize(
+ const BucketHistogramSamples& samples) const {
double max = 0;
for (size_t i = 0; i < bucket_count() ; ++i) {
- double current_size = GetBucketSize(snapshot.counts(i), i);
+ double current_size = GetBucketSize(samples.GetCountFromBucketIndex(i), i);
if (current_size > max)
max = current_size;
}
return max;
}
-void Histogram::WriteAsciiHeader(const SampleSet& snapshot,
+void Histogram::WriteAsciiHeader(const BucketHistogramSamples& samples,
Count sample_count,
string* output) const {
StringAppendF(output,
@@ -659,9 +752,9 @@
histogram_name().c_str(),
sample_count);
if (0 == sample_count) {
- DCHECK_EQ(snapshot.sum(), 0);
+ DCHECK_EQ(samples.sum(), 0);
} else {
- double average = static_cast<float>(snapshot.sum()) / sample_count;
+ double average = static_cast<float>(samples.sum()) / sample_count;
StringAppendF(output, ", average = %.1f", average);
}

Powered by Google App Engine
This is Rietveld 408576698