base/metrics/histogram.cc - Issue 10829466: SampleSet -> HistogramSamples (will be reused by SparseHistogram)

Unified Diff: base/metrics/histogram.cc

Issue 10829466: SampleSet -> HistogramSamples (will be reused by SparseHistogram) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/metrics/histogram.cc

===================================================================

--- base/metrics/histogram.cc (revision 152603)

+++ base/metrics/histogram.cc (working copy)

@@ -31,107 +31,140 @@

typedef HistogramBase::Count Count;

typedef HistogramBase::Sample Sample;

-// static

-const size_t Histogram::kBucketCount_MAX = 16384u;

+BucketHistogramSamples::BucketHistogramSamples(

+ const BucketRanges* bucket_ranges)

+ : counts_(bucket_ranges->size() - 1),

+ bucket_ranges_(bucket_ranges) {

+ CHECK_GE(bucket_ranges_->size(), 2u);

+BucketHistogramSamples::~BucketHistogramSamples() {}

-Histogram::SampleSet::SampleSet(size_t size)

- : counts_(size, 0),

- sum_(0),

- redundant_count_(0) {}

-Histogram::SampleSet::SampleSet()

- : counts_(),

- sum_(0),

- redundant_count_(0) {}

-Histogram::SampleSet::~SampleSet() {}

-void Histogram::SampleSet::Resize(size_t size) {

- counts_.resize(size, 0);

+void BucketHistogramSamples::Accumulate(Sample value, Count count) {

+ size_t bucket_index = GetBucketIndex(value, bucket_ranges_);

+ counts_[bucket_index] += count;

+ set_sum(sum() + count * value);

+ set_redundant_count(redundant_count() + count);

Ilya Sherman 2012/08/29 08:48:16 Optional nit: Perhaps the HistogramSamples() class

kaiwang 2012/08/29 22:42:16 Changed IncreaseSum and IncreaseRedundantCount

}

-void Histogram::SampleSet::Accumulate(Sample value, Count count,

- size_t index) {

- DCHECK(count == 1 || count == -1);

- counts_[index] += count;

- sum_ += count * value;

- redundant_count_ += count;

- DCHECK_GE(counts_[index], 0);

- DCHECK_GE(sum_, 0);

- DCHECK_GE(redundant_count_, 0);

+Count BucketHistogramSamples::GetCount(Sample value) const {

+ size_t bucket_index = GetBucketIndex(value, bucket_ranges_);

+ return counts_[bucket_index];

}

-Count Histogram::SampleSet::TotalCount() const {

- Count total = 0;

- for (Counts::const_iterator it = counts_.begin();

- it != counts_.end();

- ++it) {

- total += *it;

+Count BucketHistogramSamples::TotalCount() const {

+ Count count = 0;

+ for (size_t i = 0; i < counts_.size(); i++) {

+ count += counts_[i];

}

- return total;

+ return count;

}

-void Histogram::SampleSet::Add(const SampleSet& other) {

- DCHECK_EQ(counts_.size(), other.counts_.size());

- sum_ += other.sum_;

- redundant_count_ += other.redundant_count_;

- for (size_t index = 0; index < counts_.size(); ++index)

- counts_[index] += other.counts_[index];

+Count BucketHistogramSamples::GetCountFromBucketIndex(

+ size_t bucket_index) const {

Ilya Sherman 2012/08/29 08:48:16 nit: DCHECK that the index is in range?

kaiwang 2012/08/29 22:42:16 Done.

+ return counts_[bucket_index];

}

-void Histogram::SampleSet::Subtract(const SampleSet& other) {

- DCHECK_EQ(counts_.size(), other.counts_.size());

- // Note: Race conditions in snapshotting a sum may lead to (temporary)

- // negative values when snapshots are later combined (and deltas calculated).

- // As a result, we don't currently CHCEK() for positive values.

- sum_ -= other.sum_;

- redundant_count_ -= other.redundant_count_;

- for (size_t index = 0; index < counts_.size(); ++index) {

- counts_[index] -= other.counts_[index];

- DCHECK_GE(counts_[index], 0);

- }

+scoped_ptr<SampleCountIterator> BucketHistogramSamples::Iterator() const {

+ return scoped_ptr<SampleCountIterator>(

+ new BucketHistogramSamplesIterator(&counts_, bucket_ranges_));

}

-bool Histogram::SampleSet::Serialize(Pickle* pickle) const {

- pickle->WriteInt64(sum_);

- pickle->WriteInt64(redundant_count_);

- pickle->WriteUInt64(counts_.size());

+// Use simple binary search. This is very general, but there are better

+// approaches if we knew that the buckets were linearly distributed.

+//

+// static

+size_t BucketHistogramSamples::GetBucketIndex(

+ Sample value, const BucketRanges* bucket_ranges) {

+ size_t bucket_count = bucket_ranges->size() - 1;

+ CHECK_GE(bucket_count, 1u);

+ CHECK_GE(value, bucket_ranges->range(0));

+ CHECK_LT(value, bucket_ranges->range(bucket_count));

- for (size_t index = 0; index < counts_.size(); ++index) {

- pickle->WriteInt(counts_[index]);

+ size_t under = 0;

+ size_t over = bucket_count;

+ size_t mid;

+ do {

+ mid = under + (over - under)/2;

+ if (mid == under)

+ break;

+ if (bucket_ranges->range(mid) <= value)

+ under = mid;

+ else

+ over = mid;

+ } while (true);

+ return mid;

+bool BucketHistogramSamples::AddSubtractImpl(SampleCountIterator* iter,

+ bool is_add) {

+ HistogramBase::Sample min;

+ HistogramBase::Sample max;

+ HistogramBase::Count count;

+ // Go through the iterator and add the counts into correct bucket.

Ilya Sherman 2012/08/29 08:48:16 The loop has three cases. Please describe each of

kaiwang 2012/08/29 22:42:16 Done.

+ size_t index = 0;

+ while (index < counts_.size() && !iter->Done()) {

+ iter->Get(&min, &max, &count);

+ if (min == bucket_ranges_->range(index) &&

+ max == bucket_ranges_->range(index + 1)) {

+ // Bucket match!

+ counts_[index] += is_add ? count : -count;

+ iter->Next();

+ } else if (min > bucket_ranges_->range(index)) {

+ index++;

+ } else {

+ return false;

+ }

}

- return true;

+ return iter->Done();

}

-bool Histogram::SampleSet::Deserialize(PickleIterator* iter) {

- DCHECK_EQ(counts_.size(), 0u);

- DCHECK_EQ(sum_, 0);

- DCHECK_EQ(redundant_count_, 0);

+BucketHistogramSamplesIterator::BucketHistogramSamplesIterator(

+ const vector<Count>* counts,

+ const BucketRanges* bucket_ranges)

+ : counts_(counts),

+ bucket_ranges_(bucket_ranges),

+ index_(-1),

Ilya Sherman 2012/08/29 08:48:16 nit: It looks like the code would be slightly clea

kaiwang 2012/08/29 22:42:16 Need a way to tell it's on first bucket (index ==

Ilya Sherman 2012/08/29 23:41:27 Can't you tell this by the check "index_ < counts_

kaiwang 2012/08/30 03:13:22 even when counts_ has a large size, it's still pos

Ilya Sherman 2012/08/30 07:57:39 But, the constructor immediately scans over to the

kaiwang 2012/09/07 01:14:11 Changed according to your suggestion. (note, I'll

+ is_done_(false) {

+ CHECK_GT(bucket_ranges_->size(), counts_->size());

+ ForwardToNextNonemptyBucket();

- uint64 counts_size;

+bool BucketHistogramSamplesIterator::Done() {

+ return is_done_;

Ilya Sherman 2012/08/30 07:57:39 Rather than keeping a separate variable for this,

- if (!iter->ReadInt64(&sum_) ||

- !iter->ReadInt64(&redundant_count_) ||

- !iter->ReadUInt64(&counts_size)) {

- return false;

- }

+void BucketHistogramSamplesIterator::Next() {

+ ForwardToNextNonemptyBucket();

- if (counts_size == 0)

- return false;

+void BucketHistogramSamplesIterator::Get(HistogramBase::Sample* min,

+ HistogramBase::Sample* max,

+ HistogramBase::Count* count) {

+ CHECK(!Done());

+ if (min != NULL)

+ *min = bucket_ranges_->range(index_);

+ if (max != NULL)

+ *max = bucket_ranges_->range(index_ + 1);

+ if (count != NULL)

+ *count = (*counts_)[index_];

- int count = 0;

- for (uint64 index = 0; index < counts_size; ++index) {

- int i;

- if (!iter->ReadInt(&i))

- return false;

- counts_.push_back(i);

- count += i;

+void BucketHistogramSamplesIterator::ForwardToNextNonemptyBucket() {

+ CHECK(!Done());

Ilya Sherman 2012/08/30 07:57:40 This should probably be part of the public method

+ while (index_ < static_cast<int>(counts_->size()) - 1) {

+ index_++;

+ if ((*counts_)[index_] != 0)

+ return;

}

- DCHECK_EQ(count, redundant_count_);

- return count == redundant_count_;

+ is_done_ = true;

}

+// static

+const size_t Histogram::kBucketCount_MAX = 16384u;

// TODO(rtenneti): delete this code after debugging.

void CheckCorruption(const Histogram& histogram, bool new_histogram) {

const std::string& histogram_name = histogram.histogram_name();

@@ -245,26 +278,26 @@

ranges->ResetChecksum();

}

-// static

void Histogram::Add(int value) {

- if (value > kSampleType_MAX - 1)

- value = kSampleType_MAX - 1;

- if (value < 0)

- value = 0;

- size_t index = BucketIndex(value);

- DCHECK_GE(value, ranges(index));

- DCHECK_LT(value, ranges(index + 1));

- Accumulate(value, 1, index);

+ if (value > ranges(bucket_count_) - 1)

+ value = ranges(bucket_count_) - 1;

+ if (value < ranges(0))

+ value = ranges(0);

+ samples_->Accumulate(value, 1);

}

void Histogram::AddBoolean(bool value) {

DCHECK(false);

}

-void Histogram::AddSampleSet(const SampleSet& sample) {

- sample_.Add(sample);

+void Histogram::AddSamples(const HistogramSamples& samples) {

+ samples_->Add(samples);

}

+bool Histogram::AddSamples(PickleIterator* iter) {

+ return samples_->AddFromPickle(iter);

void Histogram::SetRangeDescriptions(const DescriptionPair descriptions[]) {

DCHECK(false);

}

@@ -283,7 +316,7 @@

// static

string Histogram::SerializeHistogramInfo(const Histogram& histogram,

- const SampleSet& snapshot) {

+ const HistogramSamples& snapshot) {

DCHECK_NE(NOT_VALID_IN_RENDERER, histogram.histogram_type());

DCHECK(histogram.bucket_ranges()->HasValidChecksum());

@@ -296,10 +329,10 @@

pickle.WriteInt(histogram.histogram_type());

pickle.WriteInt(histogram.flags());

- snapshot.Serialize(&pickle);

histogram.SerializeRanges(&pickle);

+ snapshot.Serialize(&pickle);

return string(static_cast<const char*>(pickle.data()), pickle.size());

}

@@ -318,7 +351,6 @@

uint32 range_checksum;

int histogram_type;

int pickle_flags;

- SampleSet sample;

PickleIterator iter(pickle);

if (!iter.ReadString(&histogram_name) ||

@@ -327,8 +359,7 @@

!iter.ReadUInt64(&bucket_count) ||

!iter.ReadUInt32(&range_checksum) ||

!iter.ReadInt(&histogram_type) ||

- !iter.ReadInt(&pickle_flags) ||

- !sample.Histogram::SampleSet::Deserialize(&iter)) {

+ !iter.ReadInt(&pickle_flags)) {

DLOG(ERROR) << "Pickle error decoding Histogram: " << histogram_name;

return false;

}

@@ -382,23 +413,21 @@

if (render_histogram->flags() & kIPCSerializationSourceFlag) {

DVLOG(1) << "Single process mode, histogram observed and not copied: "

<< histogram_name;

- } else {

- DCHECK_EQ(flags & render_histogram->flags(), flags);

- render_histogram->AddSampleSet(sample);

+ return true;

}

- return true;

+ DCHECK_EQ(flags & render_histogram->flags(), flags);

+ return render_histogram->AddSamples(&iter);

Ilya Sherman 2012/08/29 08:48:16 Should failing to deserialize the Pickle trigger a

kaiwang 2012/08/29 22:42:16 We don't want to crash browser when some (corrupte

}

+// static

+const int Histogram::kCommonRaceBasedCountMismatch = 5;

-// Validate a sample and related histogram.

Histogram::Inconsistencies Histogram::FindCorruption(

- const SampleSet& snapshot) const {

+ const HistogramSamples& samples) const {

int inconsistencies = NO_INCONSISTENCIES;

Sample previous_range = -1; // Bottom range is always 0.

- int64 count = 0;

for (size_t index = 0; index < bucket_count(); ++index) {

- count += snapshot.counts(index);

int new_range = ranges(index);

if (previous_range >= new_range)

inconsistencies |= BUCKET_ORDER_ERROR;

@@ -408,20 +437,11 @@

if (!bucket_ranges()->HasValidChecksum())

inconsistencies |= RANGE_CHECKSUM_ERROR;

- int64 delta64 = snapshot.redundant_count() - count;

+ int64 delta64 = samples.redundant_count() - samples.TotalCount();

if (delta64 != 0) {

int delta = static_cast<int>(delta64);

if (delta != delta64)

delta = INT_MAX; // Flag all giant errors as INT_MAX.

- // Since snapshots of histograms are taken asynchronously relative to

- // sampling (and snapped from different threads), it is pretty likely that

- // we'll catch a redundant count that doesn't match the sample count. We

- // allow for a certain amount of slop before flagging this as an

- // inconsistency. Even with an inconsistency, we'll snapshot it again (for

- // UMA in about a half hour, so we'll eventually get the data, if it was

- // not the result of a corruption. If histograms show that 1 is "too tight"

- // then we may try to use 2 or 3 for this slop value.

- const int kCommonRaceBasedCountMismatch = 5;

if (delta > 0) {

UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta);

if (delta > kCommonRaceBasedCountMismatch)

@@ -448,11 +468,11 @@

return bucket_count_;

}

-// Do a safe atomic snapshot of sample data.

-// This implementation assumes we are on a safe single thread.

-void Histogram::SnapshotSample(SampleSet* sample) const {

- // Note locking not done in this version!!!

Ilya Sherman 2012/08/29 08:48:16 nit: Any reason not to preserve this comment?

kaiwang 2012/08/29 22:42:16 Lock is never used in Histograms and there's no pl

- *sample = sample_;

+scoped_ptr<BucketHistogramSamples> Histogram::SnapshotSamples() const {

+ scoped_ptr<BucketHistogramSamples> samples(

+ new BucketHistogramSamples(bucket_ranges()));

+ samples->Add(*samples_);

+ return samples.Pass();

}

bool Histogram::HasConstructionArguments(Sample minimum,

@@ -471,8 +491,10 @@

bucket_ranges_(ranges),

declared_min_(minimum),

declared_max_(maximum),

- bucket_count_(bucket_count),

- sample_(bucket_count) {}

+ bucket_count_(bucket_count) {

+ if (ranges)

+ samples_.reset(new BucketHistogramSamples(ranges));

Histogram::~Histogram() {

if (StatisticsRecorder::dump_on_exit()) {

@@ -519,31 +541,6 @@

return true;

}

-size_t Histogram::BucketIndex(Sample value) const {

- // Use simple binary search. This is very general, but there are better

- // approaches if we knew that the buckets were linearly distributed.

- DCHECK_LE(ranges(0), value);

- DCHECK_GT(ranges(bucket_count()), value);

- size_t under = 0;

- size_t over = bucket_count();

- size_t mid;

- do {

- DCHECK_GE(over, under);

Ilya Sherman 2012/08/29 08:48:16 Why did you remove this DCHECK?

kaiwang 2012/09/07 01:14:11 Done.

- mid = under + (over - under)/2;

- if (mid == under)

- break;

- if (ranges(mid) <= value)

- under = mid;

- else

- over = mid;

- } while (true);

- DCHECK_LE(ranges(mid), value);

- CHECK_GT(ranges(mid+1), value);

Ilya Sherman 2012/08/29 08:48:16 Why did you remove these checks?

kaiwang 2012/08/29 22:42:16 For above checks, they are guaranteed by current c

Ilya Sherman 2012/08/29 23:41:27 The purpose of DCHECKs, among other things, is to

kaiwang 2012/08/30 03:13:22 A lot of DCHECKs actually reduce the code readabil

Ilya Sherman 2012/08/30 07:57:39 I agree that some DCHECKs are unhelpful, but I dis

kaiwang 2012/09/07 01:14:11 Done.

- return mid;

// Use the actual bucket widths (like a linear histogram) until the widths get

// over some transition value, and then use that transition width. Exponentials

// get so big so fast (and we don't expect to see a lot of entries in the large

@@ -567,12 +564,6 @@

return result;

}

-// Update histogram data with new sample.

-void Histogram::Accumulate(Sample value, Count count, size_t index) {

- // Note locking not done in this version!!!

- sample_.Accumulate(value, count, index);

//------------------------------------------------------------------------------

// Private methods

@@ -581,22 +572,21 @@

string* output) const {

// Get local (stack) copies of all effectively volatile class data so that we

// are consistent across our output activities.

- SampleSet snapshot;

- SnapshotSample(&snapshot);

- Count sample_count = snapshot.TotalCount();

+ scoped_ptr<BucketHistogramSamples> snapshot = SnapshotSamples();

+ Count sample_count = snapshot->TotalCount();

- WriteAsciiHeader(snapshot, sample_count, output);

+ WriteAsciiHeader(*snapshot, sample_count, output);

output->append(newline);

// Prepare to normalize graphical rendering of bucket contents.

double max_size = 0;

if (graph_it)

- max_size = GetPeakBucketSize(snapshot);

+ max_size = GetPeakBucketSize(*snapshot);

// Calculate space needed to print bucket range numbers. Leave room to print

// nearly the largest bucket range without sliding over the histogram.

size_t largest_non_empty_bucket = bucket_count() - 1;

- while (0 == snapshot.counts(largest_non_empty_bucket)) {

+ while (0 == snapshot->GetCountFromBucketIndex(largest_non_empty_bucket)) {

if (0 == largest_non_empty_bucket)

break; // All buckets are empty.

--largest_non_empty_bucket;

@@ -605,7 +595,7 @@

// Calculate largest print width needed for any of our bucket range displays.

size_t print_width = 1;

for (size_t i = 0; i < bucket_count(); ++i) {

- if (snapshot.counts(i)) {

+ if (snapshot->GetCountFromBucketIndex(i)) {

size_t width = GetAsciiBucketRange(i).size() + 1;

if (width > print_width)

print_width = width;

@@ -616,7 +606,7 @@

int64 past = 0;

// Output the actual histogram graph.

for (size_t i = 0; i < bucket_count(); ++i) {

- Count current = snapshot.counts(i);

+ Count current = snapshot->GetCountFromBucketIndex(i);

if (!current && !PrintEmptyBucket(i))

continue;

remaining -= current;

@@ -624,9 +614,12 @@

output->append(range);

for (size_t j = 0; range.size() + j < print_width + 1; ++j)

output->push_back(' ');

- if (0 == current && i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) {

- while (i < bucket_count() - 1 && 0 == snapshot.counts(i + 1))

+ if (0 == current && i < bucket_count() - 1 &&

+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {

+ while (i < bucket_count() - 1 &&

+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {

++i;

+ }

output->append("... ");

output->append(newline);

continue; // No reason to plot emptiness.

@@ -641,17 +634,18 @@

DCHECK_EQ(sample_count, past);

}

-double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const {

+double Histogram::GetPeakBucketSize(

+ const BucketHistogramSamples& samples) const {

double max = 0;

for (size_t i = 0; i < bucket_count() ; ++i) {

- double current_size = GetBucketSize(snapshot.counts(i), i);

+ double current_size = GetBucketSize(samples.GetCountFromBucketIndex(i), i);

if (current_size > max)

max = current_size;

}

return max;

}

-void Histogram::WriteAsciiHeader(const SampleSet& snapshot,

+void Histogram::WriteAsciiHeader(const BucketHistogramSamples& samples,

Count sample_count,

string* output) const {

StringAppendF(output,

@@ -659,9 +653,9 @@

histogram_name().c_str(),

sample_count);

if (0 == sample_count) {

- DCHECK_EQ(snapshot.sum(), 0);

+ DCHECK_EQ(samples.sum(), 0);

} else {

- double average = static_cast<float>(snapshot.sum()) / sample_count;

+ double average = static_cast<float>(samples.sum()) / sample_count;

StringAppendF(output, ", average = %.1f", average);

}

« base/metrics/histogram.h ('K') | « base/metrics/histogram.h ('k') | base/metrics/histogram_flattener.h » ('j') | base/metrics/histogram_samples.h » ('J')