base/metrics/histogram.cc - Issue 10829466: SampleSet -> HistogramSamples (will be reused by SparseHistogram)

Unified Diff: base/metrics/histogram.cc

Issue 10829466: SampleSet -> HistogramSamples (will be reused by SparseHistogram) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/metrics/histogram.cc

===================================================================

--- base/metrics/histogram.cc (revision 152603)

+++ base/metrics/histogram.cc (working copy)

@@ -31,7 +31,137 @@

typedef HistogramBase::Count Count;

typedef HistogramBase::Sample Sample;

+BucketHistogramSamples::BucketHistogramSamples(

+ const BucketRanges* bucket_ranges)

+ : counts_(bucket_ranges->size() - 1),

+ bucket_ranges_(bucket_ranges) {

+ CHECK_GE(bucket_ranges_->size(), 2u);

Ilya Sherman 2012/08/23 07:50:54 nit: Does this need to be a CHECK, or would a DCHE

kaiwang 2012/08/24 04:17:58 This check has litter performance impact, I don't

Ilya Sherman 2012/08/29 08:48:16 That is an argument for only ever using CHECKs, an

Ilya Sherman 2012/08/29 23:46:00 Brett or John, could one of you chime in on this q

+BucketHistogramSamples::~BucketHistogramSamples() {}

+void BucketHistogramSamples::Accumulate(Sample value, Count count) {

+ size_t i = BucketIndex(value, bucket_ranges_);

Ilya Sherman 2012/08/23 07:50:54 nit: i -> bucket (prefer to use descriptive names)

kaiwang 2012/08/24 04:17:58 i -> bucket_index

+ counts_[i] += count;

+ sum_ += count * value;

Ilya Sherman 2012/08/23 07:50:54 Any need to check for overflow?

kaiwang 2012/08/24 04:17:58 sum is int64 and count/value are both int32. Also,

Ilya Sherman 2012/08/29 08:48:16 Why isn't clamping to the max value for int64 the

+ redundant_count_ += count;

+Count BucketHistogramSamples::GetCount(Sample value) const {

+ size_t i = BucketIndex(value, bucket_ranges_);

+ return counts_[i];

+Count BucketHistogramSamples::TotalCount() const {

+ Count count = 0;

+ for (size_t i = 0; i < counts_.size(); i++) {

+ count += counts_[i];

+ }

+ return count;

+Count BucketHistogramSamples::GetCountFromBucketIndex(

+ size_t bucket_index) const {

+ return counts_[bucket_index];

+scoped_ptr<SampleCountIterator> BucketHistogramSamples::Iterator() const {

+ return scoped_ptr<SampleCountIterator>(

+ new BucketHistogramSamplesIterator(&counts_, bucket_ranges_));

+// Use simple binary search. This is very general, but there are better

+// approaches if we knew that the buckets were linearly distributed.

+//

// static

+size_t BucketHistogramSamples::BucketIndex(Sample value,

+ const BucketRanges* bucket_ranges) {

+ size_t bucket_count = bucket_ranges->size() - 1;

+ CHECK_GE(bucket_count, 1u);

+ CHECK_GE(value, bucket_ranges->range(0));

+ CHECK_LT(value, bucket_ranges->range(bucket_count));

+ size_t under = 0;

+ size_t over = bucket_count;

+ size_t mid;

+ do {

+ mid = under + (over - under)/2;

+ if (mid == under)

+ break;

+ if (bucket_ranges->range(mid) <= value)

+ under = mid;

+ else

+ over = mid;

+ } while (true);

Ilya Sherman 2012/08/23 07:50:54 Can we use a standard binary search implementation

kaiwang 2012/08/24 04:17:58 This code is copied from below. I think for refact

+ return mid;

+bool BucketHistogramSamples::AddSubtractHelper(SampleCountIterator* iter,

+ bool is_add) {

+ HistogramBase::Sample min;

+ HistogramBase::Sample max;

+ HistogramBase::Count count;

+ size_t index = 0;

+ while (index < counts_.size() && !iter->Done()) {

Ilya Sherman 2012/08/23 07:50:54 Please add a comment describing what this double-i

kaiwang 2012/08/24 04:17:58 Done.

+ iter->Get(&min, &max, &count);

+ if (min == bucket_ranges_->range(index) &&

+ max == bucket_ranges_->range(index + 1)) {

+ // Bucket match!

+ counts_[index] += is_add ? count : - count;

Ilya Sherman 2012/08/23 07:50:54 nit: The unary minus operator should not be follow

kaiwang 2012/08/24 04:17:58 Done.

+ iter->Next();

+ } else if (min > bucket_ranges_->range(index)) {

+ index++;

+ } else {

+ return false;

+ }

+ return iter->Done();

+BucketHistogramSamplesIterator::BucketHistogramSamplesIterator(

+ const vector<Count>* counts,

+ const BucketRanges* bucket_ranges)

+ : counts_(counts),

+ bucket_ranges_(bucket_ranges),

+ index_(-1),

+ is_done_(false) {

+ CHECK_GT(bucket_ranges_->size(), counts_->size());

+ ForwardIndex();

+bool BucketHistogramSamplesIterator::Done() {

+ return is_done_;

+void BucketHistogramSamplesIterator::Next() {

+ ForwardIndex();

+void BucketHistogramSamplesIterator::Get(HistogramBase::Sample* min,

+ HistogramBase::Sample* max,

+ HistogramBase::Count* count) {

+ CHECK(!Done());

+ if (min != NULL)

Ilya Sherman 2012/08/23 07:50:54 nit: I saw nothing in the method description that

kaiwang 2012/08/24 04:17:58 added to interface comment

+ *min = bucket_ranges_->range(index_);

+ if (max != NULL)

+ *max = bucket_ranges_->range(index_ + 1);

+ if (count != NULL)

+ *count = (*counts_)[index_];

+void BucketHistogramSamplesIterator::ForwardIndex() {

+ CHECK(!Done());

+ while (index_ < static_cast<int>(counts_->size()) - 1) {

+ index_++;

+ if ((*counts_)[index_] != 0)

+ return;

+ }

+ is_done_ = true;

+// static

const size_t Histogram::kBucketCount_MAX = 16384u;

Histogram::SampleSet::SampleSet(size_t size)

@@ -245,26 +375,26 @@

ranges->ResetChecksum();

}

-// static

void Histogram::Add(int value) {

- if (value > kSampleType_MAX - 1)

- value = kSampleType_MAX - 1;

- if (value < 0)

- value = 0;

- size_t index = BucketIndex(value);

- DCHECK_GE(value, ranges(index));

- DCHECK_LT(value, ranges(index + 1));

- Accumulate(value, 1, index);

+ if (value > ranges(bucket_count_) - 1)

jar (doing other things) 2012/08/30 22:54:38 You appear to have changed a constant comparisons

kaiwang 2012/09/07 01:14:11 Done.

+ value = ranges(bucket_count_) - 1;

+ if (value < ranges(0))

+ value = ranges(0);

+ samples_->Accumulate(value, 1);

}

void Histogram::AddBoolean(bool value) {

DCHECK(false);

}

-void Histogram::AddSampleSet(const SampleSet& sample) {

- sample_.Add(sample);

+void Histogram::AddSamples(const HistogramSamples& samples) {

+ samples_->Add(samples);

}

+bool Histogram::AddSamples(PickleIterator* iter) {

+ return samples_->Add(iter);

void Histogram::SetRangeDescriptions(const DescriptionPair descriptions[]) {

DCHECK(false);

}

@@ -283,7 +413,7 @@

// static

string Histogram::SerializeHistogramInfo(const Histogram& histogram,

- const SampleSet& snapshot) {

+ const HistogramSamples& snapshot) {

DCHECK_NE(NOT_VALID_IN_RENDERER, histogram.histogram_type());

DCHECK(histogram.bucket_ranges()->HasValidChecksum());

@@ -296,10 +426,10 @@

pickle.WriteInt(histogram.histogram_type());

pickle.WriteInt(histogram.flags());

+ histogram.SerializeRanges(&pickle);

Ilya Sherman 2012/08/23 07:50:54 Why did you move this line?

kaiwang 2012/08/24 04:17:58 this is part of "histogram info", while the code b

snapshot.Serialize(&pickle);

- histogram.SerializeRanges(&pickle);

return string(static_cast<const char*>(pickle.data()), pickle.size());

}

@@ -318,7 +448,6 @@

uint32 range_checksum;

int histogram_type;

int pickle_flags;

- SampleSet sample;

PickleIterator iter(pickle);

if (!iter.ReadString(&histogram_name) ||

@@ -327,8 +456,7 @@

!iter.ReadUInt64(&bucket_count) ||

!iter.ReadUInt32(&range_checksum) ||

!iter.ReadInt(&histogram_type) ||

- !iter.ReadInt(&pickle_flags) ||

- !sample.Histogram::SampleSet::Deserialize(&iter)) {

jar (doing other things) 2012/08/30 22:54:38 How are these samples recovered? I don't see the

kaiwang 2012/09/07 01:14:11 See line 387, it adds all samples from a PickleIte

+ !iter.ReadInt(&pickle_flags)) {

DLOG(ERROR) << "Pickle error decoding Histogram: " << histogram_name;

return false;

}

@@ -382,23 +510,22 @@

if (render_histogram->flags() & kIPCSerializationSourceFlag) {

DVLOG(1) << "Single process mode, histogram observed and not copied: "

<< histogram_name;

- } else {

- DCHECK_EQ(flags & render_histogram->flags(), flags);

- render_histogram->AddSampleSet(sample);

+ return true;

}

- return true;

+ DCHECK_EQ(flags & render_histogram->flags(), flags);

Ilya Sherman 2012/08/23 07:50:54 nit: No need for this blank line.

kaiwang 2012/08/24 04:17:58 Done.

+ return render_histogram->AddSamples(&iter);

}

+// static

+const int Histogram::kCommonRaceBasedCountMismatch = 5;

Ilya Sherman 2012/08/23 07:50:54 Why did you move this constant into the Histogram

kaiwang 2012/08/24 04:17:58 It's needed in histogram_snapshot_manager.cc The o

-// Validate a sample and related histogram.

Ilya Sherman 2012/08/23 07:50:54 nit: Why did you remove this comment?

kaiwang 2012/08/24 04:17:58 better function comment in .h file

Histogram::Inconsistencies Histogram::FindCorruption(

- const SampleSet& snapshot) const {

+ const HistogramSamples& samples) const {

int inconsistencies = NO_INCONSISTENCIES;

Sample previous_range = -1; // Bottom range is always 0.

- int64 count = 0;

for (size_t index = 0; index < bucket_count(); ++index) {

- count += snapshot.counts(index);

int new_range = ranges(index);

if (previous_range >= new_range)

inconsistencies |= BUCKET_ORDER_ERROR;

@@ -408,20 +535,11 @@

if (!bucket_ranges()->HasValidChecksum())

inconsistencies |= RANGE_CHECKSUM_ERROR;

- int64 delta64 = snapshot.redundant_count() - count;

+ int64 delta64 = samples.redundant_count() - samples.TotalCount();

if (delta64 != 0) {

int delta = static_cast<int>(delta64);

if (delta != delta64)

delta = INT_MAX; // Flag all giant errors as INT_MAX.

- // Since snapshots of histograms are taken asynchronously relative to

- // sampling (and snapped from different threads), it is pretty likely that

- // we'll catch a redundant count that doesn't match the sample count. We

- // allow for a certain amount of slop before flagging this as an

- // inconsistency. Even with an inconsistency, we'll snapshot it again (for

- // UMA in about a half hour, so we'll eventually get the data, if it was

- // not the result of a corruption. If histograms show that 1 is "too tight"

- // then we may try to use 2 or 3 for this slop value.

- const int kCommonRaceBasedCountMismatch = 5;

if (delta > 0) {

UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta);

if (delta > kCommonRaceBasedCountMismatch)

@@ -448,11 +566,11 @@

return bucket_count_;

}

-// Do a safe atomic snapshot of sample data.

-// This implementation assumes we are on a safe single thread.

-void Histogram::SnapshotSample(SampleSet* sample) const {

- // Note locking not done in this version!!!

- *sample = sample_;

+scoped_ptr<BucketHistogramSamples> Histogram::SnapshotSamples() const {

+ scoped_ptr<BucketHistogramSamples> samples(

+ new BucketHistogramSamples(bucket_ranges()));

+ samples->Add(*samples_);

+ return samples.Pass();

}

bool Histogram::HasConstructionArguments(Sample minimum,

@@ -471,8 +589,11 @@

bucket_ranges_(ranges),

declared_min_(minimum),

declared_max_(maximum),

- bucket_count_(bucket_count),

- sample_(bucket_count) {}

+ bucket_count_(bucket_count) {

+ if (ranges) {

+ samples_.reset(new BucketHistogramSamples(ranges));

+ }

Ilya Sherman 2012/08/23 07:50:54 nit: No need for curly braces

kaiwang 2012/08/24 04:17:58 Done.

Histogram::~Histogram() {

if (StatisticsRecorder::dump_on_exit()) {

@@ -519,31 +640,6 @@

return true;

}

-size_t Histogram::BucketIndex(Sample value) const {

- // Use simple binary search. This is very general, but there are better

- // approaches if we knew that the buckets were linearly distributed.

- DCHECK_LE(ranges(0), value);

- DCHECK_GT(ranges(bucket_count()), value);

- size_t under = 0;

- size_t over = bucket_count();

- size_t mid;

- do {

- DCHECK_GE(over, under);

- mid = under + (over - under)/2;

- if (mid == under)

- break;

- if (ranges(mid) <= value)

- under = mid;

- else

- over = mid;

- } while (true);

- DCHECK_LE(ranges(mid), value);

- CHECK_GT(ranges(mid+1), value);

- return mid;

// Use the actual bucket widths (like a linear histogram) until the widths get

// over some transition value, and then use that transition width. Exponentials

// get so big so fast (and we don't expect to see a lot of entries in the large

@@ -567,12 +663,6 @@

return result;

}

-// Update histogram data with new sample.

-void Histogram::Accumulate(Sample value, Count count, size_t index) {

- // Note locking not done in this version!!!

Ilya Sherman 2012/08/23 07:50:54 nit: Please keep this comment around by the releva

kaiwang 2012/08/24 04:17:58 this function is not needed any more. All Histogra

- sample_.Accumulate(value, count, index);

//------------------------------------------------------------------------------

// Private methods

@@ -581,22 +671,21 @@

string* output) const {

// Get local (stack) copies of all effectively volatile class data so that we

// are consistent across our output activities.

- SampleSet snapshot;

- SnapshotSample(&snapshot);

- Count sample_count = snapshot.TotalCount();

+ scoped_ptr<BucketHistogramSamples> snapshot = SnapshotSamples();

+ Count sample_count = snapshot->TotalCount();

- WriteAsciiHeader(snapshot, sample_count, output);

+ WriteAsciiHeader(*snapshot, sample_count, output);

output->append(newline);

// Prepare to normalize graphical rendering of bucket contents.

double max_size = 0;

if (graph_it)

- max_size = GetPeakBucketSize(snapshot);

+ max_size = GetPeakBucketSize(*snapshot);

// Calculate space needed to print bucket range numbers. Leave room to print

// nearly the largest bucket range without sliding over the histogram.

size_t largest_non_empty_bucket = bucket_count() - 1;

- while (0 == snapshot.counts(largest_non_empty_bucket)) {

+ while (0 == snapshot->GetCountFromBucketIndex(largest_non_empty_bucket)) {

if (0 == largest_non_empty_bucket)

break; // All buckets are empty.

--largest_non_empty_bucket;

@@ -605,7 +694,7 @@

// Calculate largest print width needed for any of our bucket range displays.

size_t print_width = 1;

for (size_t i = 0; i < bucket_count(); ++i) {

- if (snapshot.counts(i)) {

+ if (snapshot->GetCountFromBucketIndex(i)) {

size_t width = GetAsciiBucketRange(i).size() + 1;

if (width > print_width)

print_width = width;

@@ -616,7 +705,7 @@

int64 past = 0;

// Output the actual histogram graph.

for (size_t i = 0; i < bucket_count(); ++i) {

- Count current = snapshot.counts(i);

+ Count current = snapshot->GetCountFromBucketIndex(i);

if (!current && !PrintEmptyBucket(i))

continue;

remaining -= current;

@@ -624,9 +713,12 @@

output->append(range);

for (size_t j = 0; range.size() + j < print_width + 1; ++j)

output->push_back(' ');

- if (0 == current && i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) {

- while (i < bucket_count() - 1 && 0 == snapshot.counts(i + 1))

+ if (0 == current && i < bucket_count() - 1 &&

+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {

+ while (i < bucket_count() - 1 &&

+ 0 == snapshot->GetCountFromBucketIndex(i + 1)) {

++i;

+ }

output->append("... ");

output->append(newline);

continue; // No reason to plot emptiness.

@@ -641,17 +733,18 @@

DCHECK_EQ(sample_count, past);

}

-double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const {

+double Histogram::GetPeakBucketSize(

+ const BucketHistogramSamples& samples) const {

double max = 0;

for (size_t i = 0; i < bucket_count() ; ++i) {

- double current_size = GetBucketSize(snapshot.counts(i), i);

+ double current_size = GetBucketSize(samples.GetCountFromBucketIndex(i), i);

if (current_size > max)

max = current_size;

}

return max;

}

-void Histogram::WriteAsciiHeader(const SampleSet& snapshot,

+void Histogram::WriteAsciiHeader(const BucketHistogramSamples& samples,

Count sample_count,

string* output) const {

StringAppendF(output,

@@ -659,9 +752,9 @@

histogram_name().c_str(),

sample_count);

if (0 == sample_count) {

- DCHECK_EQ(snapshot.sum(), 0);

+ DCHECK_EQ(samples.sum(), 0);

} else {

- double average = static_cast<float>(snapshot.sum()) / sample_count;

+ double average = static_cast<float>(samples.sum()) / sample_count;

StringAppendF(output, ", average = %.1f", average);

}

« base/metrics/histogram.h ('K') | « base/metrics/histogram.h ('k') | base/metrics/histogram_flattener.h » ('j') | base/metrics/histogram_samples.h » ('J')