Chromium Code Reviews| Index: base/metrics/histogram.cc |
| =================================================================== |
| --- base/metrics/histogram.cc (revision 152603) |
| +++ base/metrics/histogram.cc (working copy) |
| @@ -31,107 +31,140 @@ |
| typedef HistogramBase::Count Count; |
| typedef HistogramBase::Sample Sample; |
| -// static |
| -const size_t Histogram::kBucketCount_MAX = 16384u; |
| +BucketHistogramSamples::BucketHistogramSamples( |
| + const BucketRanges* bucket_ranges) |
| + : counts_(bucket_ranges->size() - 1), |
| + bucket_ranges_(bucket_ranges) { |
| + CHECK_GE(bucket_ranges_->size(), 2u); |
| +} |
| +BucketHistogramSamples::~BucketHistogramSamples() {} |
| -Histogram::SampleSet::SampleSet(size_t size) |
| - : counts_(size, 0), |
| - sum_(0), |
| - redundant_count_(0) {} |
| - |
| -Histogram::SampleSet::SampleSet() |
| - : counts_(), |
| - sum_(0), |
| - redundant_count_(0) {} |
| - |
| -Histogram::SampleSet::~SampleSet() {} |
| - |
| -void Histogram::SampleSet::Resize(size_t size) { |
| - counts_.resize(size, 0); |
| +void BucketHistogramSamples::Accumulate(Sample value, Count count) { |
| + size_t bucket_index = GetBucketIndex(value, bucket_ranges_); |
| + counts_[bucket_index] += count; |
| + set_sum(sum() + count * value); |
| + set_redundant_count(redundant_count() + count); |
|
Ilya Sherman
2012/08/29 08:48:16
Optional nit: Perhaps the HistogramSamples() class
kaiwang
2012/08/29 22:42:16
Changed IncreaseSum and IncreaseRedundantCount
|
| } |
| -void Histogram::SampleSet::Accumulate(Sample value, Count count, |
| - size_t index) { |
| - DCHECK(count == 1 || count == -1); |
| - counts_[index] += count; |
| - sum_ += count * value; |
| - redundant_count_ += count; |
| - DCHECK_GE(counts_[index], 0); |
| - DCHECK_GE(sum_, 0); |
| - DCHECK_GE(redundant_count_, 0); |
| +Count BucketHistogramSamples::GetCount(Sample value) const { |
| + size_t bucket_index = GetBucketIndex(value, bucket_ranges_); |
| + return counts_[bucket_index]; |
| } |
| -Count Histogram::SampleSet::TotalCount() const { |
| - Count total = 0; |
| - for (Counts::const_iterator it = counts_.begin(); |
| - it != counts_.end(); |
| - ++it) { |
| - total += *it; |
| +Count BucketHistogramSamples::TotalCount() const { |
| + Count count = 0; |
| + for (size_t i = 0; i < counts_.size(); i++) { |
| + count += counts_[i]; |
| } |
| - return total; |
| + return count; |
| } |
| -void Histogram::SampleSet::Add(const SampleSet& other) { |
| - DCHECK_EQ(counts_.size(), other.counts_.size()); |
| - sum_ += other.sum_; |
| - redundant_count_ += other.redundant_count_; |
| - for (size_t index = 0; index < counts_.size(); ++index) |
| - counts_[index] += other.counts_[index]; |
| +Count BucketHistogramSamples::GetCountFromBucketIndex( |
| + size_t bucket_index) const { |
|
Ilya Sherman
2012/08/29 08:48:16
nit: DCHECK that the index is in range?
kaiwang
2012/08/29 22:42:16
Done.
|
| + return counts_[bucket_index]; |
| } |
| -void Histogram::SampleSet::Subtract(const SampleSet& other) { |
| - DCHECK_EQ(counts_.size(), other.counts_.size()); |
| - // Note: Race conditions in snapshotting a sum may lead to (temporary) |
| - // negative values when snapshots are later combined (and deltas calculated). |
| - // As a result, we don't currently CHCEK() for positive values. |
| - sum_ -= other.sum_; |
| - redundant_count_ -= other.redundant_count_; |
| - for (size_t index = 0; index < counts_.size(); ++index) { |
| - counts_[index] -= other.counts_[index]; |
| - DCHECK_GE(counts_[index], 0); |
| - } |
| +scoped_ptr<SampleCountIterator> BucketHistogramSamples::Iterator() const { |
| + return scoped_ptr<SampleCountIterator>( |
| + new BucketHistogramSamplesIterator(&counts_, bucket_ranges_)); |
| } |
| -bool Histogram::SampleSet::Serialize(Pickle* pickle) const { |
| - pickle->WriteInt64(sum_); |
| - pickle->WriteInt64(redundant_count_); |
| - pickle->WriteUInt64(counts_.size()); |
| +// Use simple binary search. This is very general, but there are better |
| +// approaches if we knew that the buckets were linearly distributed. |
| +// |
| +// static |
| +size_t BucketHistogramSamples::GetBucketIndex( |
| + Sample value, const BucketRanges* bucket_ranges) { |
| + size_t bucket_count = bucket_ranges->size() - 1; |
| + CHECK_GE(bucket_count, 1u); |
| + CHECK_GE(value, bucket_ranges->range(0)); |
| + CHECK_LT(value, bucket_ranges->range(bucket_count)); |
| - for (size_t index = 0; index < counts_.size(); ++index) { |
| - pickle->WriteInt(counts_[index]); |
| + size_t under = 0; |
| + size_t over = bucket_count; |
| + size_t mid; |
| + do { |
| + mid = under + (over - under)/2; |
| + if (mid == under) |
| + break; |
| + if (bucket_ranges->range(mid) <= value) |
| + under = mid; |
| + else |
| + over = mid; |
| + } while (true); |
| + return mid; |
| +} |
| + |
| +bool BucketHistogramSamples::AddSubtractImpl(SampleCountIterator* iter, |
| + bool is_add) { |
| + HistogramBase::Sample min; |
| + HistogramBase::Sample max; |
| + HistogramBase::Count count; |
| + |
| + // Go through the iterator and add the counts into correct bucket. |
|
Ilya Sherman
2012/08/29 08:48:16
The loop has three cases. Please describe each of
kaiwang
2012/08/29 22:42:16
Done.
|
| + size_t index = 0; |
| + while (index < counts_.size() && !iter->Done()) { |
| + iter->Get(&min, &max, &count); |
| + if (min == bucket_ranges_->range(index) && |
| + max == bucket_ranges_->range(index + 1)) { |
| + // Bucket match! |
| + counts_[index] += is_add ? count : -count; |
| + iter->Next(); |
| + } else if (min > bucket_ranges_->range(index)) { |
| + index++; |
| + } else { |
| + return false; |
| + } |
| } |
| - return true; |
| + return iter->Done(); |
| } |
| -bool Histogram::SampleSet::Deserialize(PickleIterator* iter) { |
| - DCHECK_EQ(counts_.size(), 0u); |
| - DCHECK_EQ(sum_, 0); |
| - DCHECK_EQ(redundant_count_, 0); |
| +BucketHistogramSamplesIterator::BucketHistogramSamplesIterator( |
| + const vector<Count>* counts, |
| + const BucketRanges* bucket_ranges) |
| + : counts_(counts), |
| + bucket_ranges_(bucket_ranges), |
| + index_(-1), |
|
Ilya Sherman
2012/08/29 08:48:16
nit: It looks like the code would be slightly clea
kaiwang
2012/08/29 22:42:16
Need a way to tell it's on first bucket (index ==
Ilya Sherman
2012/08/29 23:41:27
Can't you tell this by the check "index_ < counts_
kaiwang
2012/08/30 03:13:22
even when counts_ has a large size, it's still pos
Ilya Sherman
2012/08/30 07:57:39
But, the constructor immediately scans over to the
kaiwang
2012/09/07 01:14:11
Changed according to your suggestion. (note, I'll
|
| + is_done_(false) { |
| + CHECK_GT(bucket_ranges_->size(), counts_->size()); |
| + ForwardToNextNonemptyBucket(); |
| +} |
| - uint64 counts_size; |
| +bool BucketHistogramSamplesIterator::Done() { |
| + return is_done_; |
|
Ilya Sherman
2012/08/30 07:57:39
Rather than keeping a separate variable for this,
|
| +} |
| - if (!iter->ReadInt64(&sum_) || |
| - !iter->ReadInt64(&redundant_count_) || |
| - !iter->ReadUInt64(&counts_size)) { |
| - return false; |
| - } |
| +void BucketHistogramSamplesIterator::Next() { |
| + ForwardToNextNonemptyBucket(); |
| +} |
| - if (counts_size == 0) |
| - return false; |
| +void BucketHistogramSamplesIterator::Get(HistogramBase::Sample* min, |
| + HistogramBase::Sample* max, |
| + HistogramBase::Count* count) { |
| + CHECK(!Done()); |
| + if (min != NULL) |
| + *min = bucket_ranges_->range(index_); |
| + if (max != NULL) |
| + *max = bucket_ranges_->range(index_ + 1); |
| + if (count != NULL) |
| + *count = (*counts_)[index_]; |
| +} |
| - int count = 0; |
| - for (uint64 index = 0; index < counts_size; ++index) { |
| - int i; |
| - if (!iter->ReadInt(&i)) |
| - return false; |
| - counts_.push_back(i); |
| - count += i; |
| +void BucketHistogramSamplesIterator::ForwardToNextNonemptyBucket() { |
| + CHECK(!Done()); |
|
Ilya Sherman
2012/08/30 07:57:40
This should probably be part of the public method
|
| + |
| + while (index_ < static_cast<int>(counts_->size()) - 1) { |
| + index_++; |
| + if ((*counts_)[index_] != 0) |
| + return; |
| } |
| - DCHECK_EQ(count, redundant_count_); |
| - return count == redundant_count_; |
| + is_done_ = true; |
| } |
| +// static |
| +const size_t Histogram::kBucketCount_MAX = 16384u; |
| + |
| // TODO(rtenneti): delete this code after debugging. |
| void CheckCorruption(const Histogram& histogram, bool new_histogram) { |
| const std::string& histogram_name = histogram.histogram_name(); |
| @@ -245,26 +278,26 @@ |
| ranges->ResetChecksum(); |
| } |
| -// static |
| void Histogram::Add(int value) { |
| - if (value > kSampleType_MAX - 1) |
| - value = kSampleType_MAX - 1; |
| - if (value < 0) |
| - value = 0; |
| - size_t index = BucketIndex(value); |
| - DCHECK_GE(value, ranges(index)); |
| - DCHECK_LT(value, ranges(index + 1)); |
| - Accumulate(value, 1, index); |
| + if (value > ranges(bucket_count_) - 1) |
| + value = ranges(bucket_count_) - 1; |
| + if (value < ranges(0)) |
| + value = ranges(0); |
| + samples_->Accumulate(value, 1); |
| } |
| void Histogram::AddBoolean(bool value) { |
| DCHECK(false); |
| } |
| -void Histogram::AddSampleSet(const SampleSet& sample) { |
| - sample_.Add(sample); |
| +void Histogram::AddSamples(const HistogramSamples& samples) { |
| + samples_->Add(samples); |
| } |
| +bool Histogram::AddSamples(PickleIterator* iter) { |
| + return samples_->AddFromPickle(iter); |
| +} |
| + |
| void Histogram::SetRangeDescriptions(const DescriptionPair descriptions[]) { |
| DCHECK(false); |
| } |
| @@ -283,7 +316,7 @@ |
| // static |
| string Histogram::SerializeHistogramInfo(const Histogram& histogram, |
| - const SampleSet& snapshot) { |
| + const HistogramSamples& snapshot) { |
| DCHECK_NE(NOT_VALID_IN_RENDERER, histogram.histogram_type()); |
| DCHECK(histogram.bucket_ranges()->HasValidChecksum()); |
| @@ -296,10 +329,10 @@ |
| pickle.WriteInt(histogram.histogram_type()); |
| pickle.WriteInt(histogram.flags()); |
| - snapshot.Serialize(&pickle); |
| - |
| histogram.SerializeRanges(&pickle); |
| + snapshot.Serialize(&pickle); |
| + |
| return string(static_cast<const char*>(pickle.data()), pickle.size()); |
| } |
| @@ -318,7 +351,6 @@ |
| uint32 range_checksum; |
| int histogram_type; |
| int pickle_flags; |
| - SampleSet sample; |
| PickleIterator iter(pickle); |
| if (!iter.ReadString(&histogram_name) || |
| @@ -327,8 +359,7 @@ |
| !iter.ReadUInt64(&bucket_count) || |
| !iter.ReadUInt32(&range_checksum) || |
| !iter.ReadInt(&histogram_type) || |
| - !iter.ReadInt(&pickle_flags) || |
| - !sample.Histogram::SampleSet::Deserialize(&iter)) { |
| + !iter.ReadInt(&pickle_flags)) { |
| DLOG(ERROR) << "Pickle error decoding Histogram: " << histogram_name; |
| return false; |
| } |
| @@ -382,23 +413,21 @@ |
| if (render_histogram->flags() & kIPCSerializationSourceFlag) { |
| DVLOG(1) << "Single process mode, histogram observed and not copied: " |
| << histogram_name; |
| - } else { |
| - DCHECK_EQ(flags & render_histogram->flags(), flags); |
| - render_histogram->AddSampleSet(sample); |
| + return true; |
| } |
| - return true; |
| + DCHECK_EQ(flags & render_histogram->flags(), flags); |
| + return render_histogram->AddSamples(&iter); |
|
Ilya Sherman
2012/08/29 08:48:16
Should failing to deserialize the Pickle trigger a
kaiwang
2012/08/29 22:42:16
We don't want to crash browser when some (corrupte
|
| } |
| +// static |
| +const int Histogram::kCommonRaceBasedCountMismatch = 5; |
| -// Validate a sample and related histogram. |
| Histogram::Inconsistencies Histogram::FindCorruption( |
| - const SampleSet& snapshot) const { |
| + const HistogramSamples& samples) const { |
| int inconsistencies = NO_INCONSISTENCIES; |
| Sample previous_range = -1; // Bottom range is always 0. |
| - int64 count = 0; |
| for (size_t index = 0; index < bucket_count(); ++index) { |
| - count += snapshot.counts(index); |
| int new_range = ranges(index); |
| if (previous_range >= new_range) |
| inconsistencies |= BUCKET_ORDER_ERROR; |
| @@ -408,20 +437,11 @@ |
| if (!bucket_ranges()->HasValidChecksum()) |
| inconsistencies |= RANGE_CHECKSUM_ERROR; |
| - int64 delta64 = snapshot.redundant_count() - count; |
| + int64 delta64 = samples.redundant_count() - samples.TotalCount(); |
| if (delta64 != 0) { |
| int delta = static_cast<int>(delta64); |
| if (delta != delta64) |
| delta = INT_MAX; // Flag all giant errors as INT_MAX. |
| - // Since snapshots of histograms are taken asynchronously relative to |
| - // sampling (and snapped from different threads), it is pretty likely that |
| - // we'll catch a redundant count that doesn't match the sample count. We |
| - // allow for a certain amount of slop before flagging this as an |
| - // inconsistency. Even with an inconsistency, we'll snapshot it again (for |
| - // UMA in about a half hour, so we'll eventually get the data, if it was |
| - // not the result of a corruption. If histograms show that 1 is "too tight" |
| - // then we may try to use 2 or 3 for this slop value. |
| - const int kCommonRaceBasedCountMismatch = 5; |
| if (delta > 0) { |
| UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta); |
| if (delta > kCommonRaceBasedCountMismatch) |
| @@ -448,11 +468,11 @@ |
| return bucket_count_; |
| } |
| -// Do a safe atomic snapshot of sample data. |
| -// This implementation assumes we are on a safe single thread. |
| -void Histogram::SnapshotSample(SampleSet* sample) const { |
| - // Note locking not done in this version!!! |
|
Ilya Sherman
2012/08/29 08:48:16
nit: Any reason not to preserve this comment?
kaiwang
2012/08/29 22:42:16
Lock is never used in Histograms and there's no pl
|
| - *sample = sample_; |
| +scoped_ptr<BucketHistogramSamples> Histogram::SnapshotSamples() const { |
| + scoped_ptr<BucketHistogramSamples> samples( |
| + new BucketHistogramSamples(bucket_ranges())); |
| + samples->Add(*samples_); |
| + return samples.Pass(); |
| } |
| bool Histogram::HasConstructionArguments(Sample minimum, |
| @@ -471,8 +491,10 @@ |
| bucket_ranges_(ranges), |
| declared_min_(minimum), |
| declared_max_(maximum), |
| - bucket_count_(bucket_count), |
| - sample_(bucket_count) {} |
| + bucket_count_(bucket_count) { |
| + if (ranges) |
| + samples_.reset(new BucketHistogramSamples(ranges)); |
| +} |
| Histogram::~Histogram() { |
| if (StatisticsRecorder::dump_on_exit()) { |
| @@ -519,31 +541,6 @@ |
| return true; |
| } |
| -size_t Histogram::BucketIndex(Sample value) const { |
| - // Use simple binary search. This is very general, but there are better |
| - // approaches if we knew that the buckets were linearly distributed. |
| - DCHECK_LE(ranges(0), value); |
| - DCHECK_GT(ranges(bucket_count()), value); |
| - size_t under = 0; |
| - size_t over = bucket_count(); |
| - size_t mid; |
| - |
| - do { |
| - DCHECK_GE(over, under); |
|
Ilya Sherman
2012/08/29 08:48:16
Why did you remove this DCHECK?
kaiwang
2012/09/07 01:14:11
Done.
|
| - mid = under + (over - under)/2; |
| - if (mid == under) |
| - break; |
| - if (ranges(mid) <= value) |
| - under = mid; |
| - else |
| - over = mid; |
| - } while (true); |
| - |
| - DCHECK_LE(ranges(mid), value); |
| - CHECK_GT(ranges(mid+1), value); |
|
Ilya Sherman
2012/08/29 08:48:16
Why did you remove these checks?
kaiwang
2012/08/29 22:42:16
For above checks, they are guaranteed by current c
Ilya Sherman
2012/08/29 23:41:27
The purpose of DCHECKs, among other things, is to
kaiwang
2012/08/30 03:13:22
A lot of DCHECKs actually reduce the code readabil
Ilya Sherman
2012/08/30 07:57:39
I agree that some DCHECKs are unhelpful, but I dis
kaiwang
2012/09/07 01:14:11
Done.
|
| - return mid; |
| -} |
| - |
| // Use the actual bucket widths (like a linear histogram) until the widths get |
| // over some transition value, and then use that transition width. Exponentials |
| // get so big so fast (and we don't expect to see a lot of entries in the large |
| @@ -567,12 +564,6 @@ |
| return result; |
| } |
| -// Update histogram data with new sample. |
| -void Histogram::Accumulate(Sample value, Count count, size_t index) { |
| - // Note locking not done in this version!!! |
| - sample_.Accumulate(value, count, index); |
| -} |
| - |
| //------------------------------------------------------------------------------ |
| // Private methods |
| @@ -581,22 +572,21 @@ |
| string* output) const { |
| // Get local (stack) copies of all effectively volatile class data so that we |
| // are consistent across our output activities. |
| - SampleSet snapshot; |
| - SnapshotSample(&snapshot); |
| - Count sample_count = snapshot.TotalCount(); |
| + scoped_ptr<BucketHistogramSamples> snapshot = SnapshotSamples(); |
| + Count sample_count = snapshot->TotalCount(); |
| - WriteAsciiHeader(snapshot, sample_count, output); |
| + WriteAsciiHeader(*snapshot, sample_count, output); |
| output->append(newline); |
| // Prepare to normalize graphical rendering of bucket contents. |
| double max_size = 0; |
| if (graph_it) |
| - max_size = GetPeakBucketSize(snapshot); |
| + max_size = GetPeakBucketSize(*snapshot); |
| // Calculate space needed to print bucket range numbers. Leave room to print |
| // nearly the largest bucket range without sliding over the histogram. |
| size_t largest_non_empty_bucket = bucket_count() - 1; |
| - while (0 == snapshot.counts(largest_non_empty_bucket)) { |
| + while (0 == snapshot->GetCountFromBucketIndex(largest_non_empty_bucket)) { |
| if (0 == largest_non_empty_bucket) |
| break; // All buckets are empty. |
| --largest_non_empty_bucket; |
| @@ -605,7 +595,7 @@ |
| // Calculate largest print width needed for any of our bucket range displays. |
| size_t print_width = 1; |
| for (size_t i = 0; i < bucket_count(); ++i) { |
| - if (snapshot.counts(i)) { |
| + if (snapshot->GetCountFromBucketIndex(i)) { |
| size_t width = GetAsciiBucketRange(i).size() + 1; |
| if (width > print_width) |
| print_width = width; |
| @@ -616,7 +606,7 @@ |
| int64 past = 0; |
| // Output the actual histogram graph. |
| for (size_t i = 0; i < bucket_count(); ++i) { |
| - Count current = snapshot.counts(i); |
| + Count current = snapshot->GetCountFromBucketIndex(i); |
| if (!current && !PrintEmptyBucket(i)) |
| continue; |
| remaining -= current; |
| @@ -624,9 +614,12 @@ |
| output->append(range); |
| for (size_t j = 0; range.size() + j < print_width + 1; ++j) |
| output->push_back(' '); |
| - if (0 == current && i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) { |
| - while (i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) |
| + if (0 == current && i < bucket_count() - 1 && |
| + 0 == snapshot->GetCountFromBucketIndex(i + 1)) { |
| + while (i < bucket_count() - 1 && |
| + 0 == snapshot->GetCountFromBucketIndex(i + 1)) { |
| ++i; |
| + } |
| output->append("... "); |
| output->append(newline); |
| continue; // No reason to plot emptiness. |
| @@ -641,17 +634,18 @@ |
| DCHECK_EQ(sample_count, past); |
| } |
| -double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const { |
| +double Histogram::GetPeakBucketSize( |
| + const BucketHistogramSamples& samples) const { |
| double max = 0; |
| for (size_t i = 0; i < bucket_count() ; ++i) { |
| - double current_size = GetBucketSize(snapshot.counts(i), i); |
| + double current_size = GetBucketSize(samples.GetCountFromBucketIndex(i), i); |
| if (current_size > max) |
| max = current_size; |
| } |
| return max; |
| } |
| -void Histogram::WriteAsciiHeader(const SampleSet& snapshot, |
| +void Histogram::WriteAsciiHeader(const BucketHistogramSamples& samples, |
| Count sample_count, |
| string* output) const { |
| StringAppendF(output, |
| @@ -659,9 +653,9 @@ |
| histogram_name().c_str(), |
| sample_count); |
| if (0 == sample_count) { |
| - DCHECK_EQ(snapshot.sum(), 0); |
| + DCHECK_EQ(samples.sum(), 0); |
| } else { |
| - double average = static_cast<float>(snapshot.sum()) / sample_count; |
| + double average = static_cast<float>(samples.sum()) / sample_count; |
| StringAppendF(output, ", average = %.1f", average); |
| } |