From d10f92108a6886f196cc96361445248c7e142c49 Mon Sep 17 00:00:00 2001 From: Ivan <2103732+codebien@users.noreply.github.com> Date: Wed, 24 May 2023 00:53:27 +0200 Subject: [PATCH] Store only significant buckets Not allocate anymore buckets before and after the first and the last non-zero bucket. It saves a bunch of memory. --- output/cloud/expv2/hdr.go | 90 +++++++++---------- output/cloud/expv2/hdr_test.go | 156 ++++++++++++++++++++------------- 2 files changed, 137 insertions(+), 109 deletions(-) diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go index 1f289f75eb3b..faa8d0ac3eae 100644 --- a/output/cloud/expv2/hdr.go +++ b/output/cloud/expv2/hdr.go @@ -86,18 +86,8 @@ type histogram struct { // doesn't make any sense in this way. // It aggregates calling addToBucket time-to-time so // trimzeros has to be called at the end of the process. -func newHistogram(values []float64) histogram { - h := histogram{} - if len(values) < 1 { - return h - } - - for i := 0; i < len(values); i++ { - h.addToBucket(values[i]) - } - - h.trimzeros() - return h +func newHistogram() histogram { + return histogram{} } // addToBucket increments the counter of the bucket @@ -105,12 +95,6 @@ func newHistogram(values []float64) histogram { // If the value is lower or higher than the trackable limits // then it is counted into specific buckets. // All the stats are also updated accordingly. -// -// TODO: add the test case in units doing -// addToBucket + trimzeros + addToBucket -// so calling addToBucket after trimzeros was called. -// We don't expect to have this case but the current API -// would support it so we need to make sure it works or refactor. func (h *histogram) addToBucket(v float64) { if h.Count == 0 { h.Max, h.Min = v, v @@ -136,64 +120,72 @@ func (h *histogram) addToBucket(v float64) { } index := resolveBucketIndex(v) + blen := uint32(len(h.Buckets)) if blen == 0 { h.FirstNotZeroBucket = index h.LastNotZeroBucket = index + h.Buckets = make([]uint32, 1, 32) } else { if index < h.FirstNotZeroBucket { + h.growLeft(index) h.FirstNotZeroBucket = index } if index > h.LastNotZeroBucket { + h.growRight(index) h.LastNotZeroBucket = index } } - if index >= blen { - h.grow(index) + h.Buckets[index-h.FirstNotZeroBucket]++ +} + +func (h *histogram) growLeft(index uint32) { + if h.FirstNotZeroBucket <= index { + panic("buckets is already contains the requested index") + } + + newLen := (h.FirstNotZeroBucket - index) + uint32(len(h.Buckets)) + + // there is enough capacity in the undrelying array + // reuse it slicing without allocate a new array + if uint32(cap(h.Buckets)) >= newLen { + upIndex := len(h.Buckets) - 1 + h.Buckets = h.Buckets[:newLen] + h.Buckets = append(h.Buckets[upIndex+1:], h.Buckets[:upIndex+1]...) + return } - h.Buckets[index]++ + + // there isn't enough capacity, allocate a new slice + // able to contain the new size + + newBuckets := make([]uint32, newLen) + copy(newBuckets[h.FirstNotZeroBucket-index:], h.Buckets) + h.Buckets = newBuckets } -// grow expands the buckets slice -// with zeros up to the required index -func (h *histogram) grow(index uint32) { - i := int(index) - if len(h.Buckets)-1 > i { +// growRight expands the buckets slice +// with zeros up to the required index. +// If it array has enough capacity then it reuses it without allocate. +func (h *histogram) growRight(index uint32) { + if h.LastNotZeroBucket >= index { panic("buckets is already bigger than requested index") } - if cap(h.Buckets) > i { + + newLen := index - h.FirstNotZeroBucket + 1 + + if uint32(cap(h.Buckets)) > newLen { // See https://go.dev/ref/spec#Slice_expressions // "For slices, the upper index bound is // the slice capacity cap(a) rather than the length" - h.Buckets = h.Buckets[:index+1] + h.Buckets = h.Buckets[:newLen] } else { - length := i + 1 - // let's make two times larger of - // the current request - newBuckets := make([]uint32, length, (len(h.Buckets)+length)*2) + newBuckets := make([]uint32, newLen) copy(newBuckets, h.Buckets) h.Buckets = newBuckets } } -// trimzeros removes all buckets that have a zero value -// from the begin and from the end until -// the first not zero bucket. -func (h *histogram) trimzeros() { - if h.Count < 1 || len(h.Buckets) < 1 { - return - } - - // all the counters are set to zero, we can remove all - if h.FirstNotZeroBucket == 0 && h.LastNotZeroBucket == 0 { - h.Buckets = []uint32{} - return - } - - h.Buckets = h.Buckets[h.FirstNotZeroBucket : h.LastNotZeroBucket+1] -} - // histogramAsProto converts the histogram into the equivalent Protobuf version. func histogramAsProto(h *histogram, time time.Time) *pbcloud.TrendHdrValue { hval := &pbcloud.TrendHdrValue{ diff --git a/output/cloud/expv2/hdr_test.go b/output/cloud/expv2/hdr_test.go index 3fefad8ca619..9ef18065fb83 100644 --- a/output/cloud/expv2/hdr_test.go +++ b/output/cloud/expv2/hdr_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.k6.io/k6/output/cloud/expv2/pbcloud" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -39,25 +40,86 @@ func TestValueBacket(t *testing.T) { func TestNewHistogramWithSimpleValue(t *testing.T) { t.Parallel() - res := newHistogram([]float64{100}) + + res := newHistogram() + res.addToBucket(8) + res.addToBucket(5) exp := histogram{ - Buckets: []uint32{1}, + Buckets: []uint32{1, 0, 0, 1}, + FirstNotZeroBucket: 5, + LastNotZeroBucket: 8, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: 8, + Min: 5, + Sum: 13, + Count: 2, + } + + require.Equal(t, exp, res) + + res = newHistogram() + res.addToBucket(100) + res.addToBucket(101) + + exp = histogram{ + Buckets: []uint32{1, 1}, FirstNotZeroBucket: 100, - LastNotZeroBucket: 100, + LastNotZeroBucket: 101, ExtraLowBucket: 0, ExtraHighBucket: 0, - Max: 100, + Max: 101, Min: 100, - Sum: 100, - Count: 1, + Sum: 201, + Count: 2, } + + res = newHistogram() + res.addToBucket(101) + res.addToBucket(100) + + exp = histogram{ + Buckets: []uint32{1, 1}, + FirstNotZeroBucket: 100, + LastNotZeroBucket: 101, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: 101, + Min: 100, + Sum: 201, + Count: 2, + } + assert.Equal(t, exp, res) + + res = newHistogram() + res.addToBucket(8) + res.addToBucket(9) + res.addToBucket(10) + res.addToBucket(5) + + exp = histogram{ + Buckets: []uint32{1, 0, 0, 1, 1, 1}, + FirstNotZeroBucket: 5, + LastNotZeroBucket: 10, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: 10, + Min: 5, + Sum: 32, + Count: 4, + } + assert.Equal(t, exp, res) } func TestNewHistogramWithUntrackables(t *testing.T) { t.Parallel() - res := newHistogram([]float64{5, -3.14, 2 * 1e9, 1}) + + res := newHistogram() + for _, v := range []float64{5, -3.14, 2 * 1e9, 1} { + res.addToBucket(v) + } exp := histogram{ Buckets: []uint32{1, 0, 0, 0, 1}, @@ -75,7 +137,11 @@ func TestNewHistogramWithUntrackables(t *testing.T) { func TestNewHistogramWithMultipleValues(t *testing.T) { t.Parallel() - res := newHistogram([]float64{51.8, 103.6, 103.6, 103.6, 103.6}) + + res := newHistogram() + for _, v := range []float64{51.8, 103.6, 103.6, 103.6, 103.6} { + res.addToBucket(v) + } exp := histogram{ FirstNotZeroBucket: 52, @@ -94,7 +160,9 @@ func TestNewHistogramWithMultipleValues(t *testing.T) { func TestNewHistogramWithNegativeNum(t *testing.T) { t.Parallel() - res := newHistogram([]float64{-2.42314}) + + res := newHistogram() + res.addToBucket(-2.42314) exp := histogram{ FirstNotZeroBucket: 0, @@ -111,7 +179,10 @@ func TestNewHistogramWithNegativeNum(t *testing.T) { func TestNewHistogramWithMultipleNegativeNums(t *testing.T) { t.Parallel() - res := newHistogram([]float64{-0.001, -0.001, -0.001}) + res := newHistogram() + for _, v := range []float64{-0.001, -0.001, -0.001} { + res.addToBucket(v) + } exp := histogram{ Buckets: nil, @@ -128,7 +199,8 @@ func TestNewHistogramWithMultipleNegativeNums(t *testing.T) { func TestNewHistoramWithNoVals(t *testing.T) { t.Parallel() - res := newHistogram([]float64{}) + + res := newHistogram() exp := histogram{ Buckets: nil, FirstNotZeroBucket: 0, @@ -141,63 +213,28 @@ func TestNewHistoramWithNoVals(t *testing.T) { assert.Equal(t, exp, res) } -func TestHistogramTrimzeros(t *testing.T) { - t.Parallel() - - cases := []struct { - in histogram - exp []uint32 - }{ - {in: histogram{Buckets: []uint32{}}, exp: []uint32{}}, - {in: histogram{Buckets: []uint32{0}}, exp: []uint32{}}, - {in: histogram{Buckets: []uint32{0, 0, 0}}, exp: []uint32{}}, - { - in: histogram{ - Buckets: []uint32{0, 0, 0, 0, 0, 0, 1, 0}, - FirstNotZeroBucket: 6, - LastNotZeroBucket: 6, - }, - exp: []uint32{1}, - }, - { - in: histogram{ - Buckets: []uint32{0, 0, 0, 1, 9, 0, 0, 1, 0, 0, 0}, - FirstNotZeroBucket: 3, - LastNotZeroBucket: 7, - }, - exp: []uint32{1, 9, 0, 0, 1}, - }, - } - - for _, tc := range cases { - h := tc.in - h.Count = 1 - h.trimzeros() - assert.Equal(t, tc.exp, h.Buckets, tc.in.Buckets) - } -} - -func TestHistogramGrow(t *testing.T) { +func TestHistogramGrowRight(t *testing.T) { t.Parallel() h := histogram{} // the cap is smaller than requested index // so it creates a new slice - h.grow(3) + h.growRight(3) assert.Len(t, h.Buckets, 4) // it must preserve already existing items h.Buckets[2] = 101 // it appends to the same slice - h.grow(5) + h.growRight(5) assert.Len(t, h.Buckets, 6) assert.Equal(t, uint32(101), h.Buckets[2]) assert.Equal(t, uint32(0), h.Buckets[5]) // it is not possible to request an index smaller than // the last already available index - assert.Panics(t, func() { h.grow(4) }) + h.LastNotZeroBucket = 5 + assert.Panics(t, func() { h.growRight(4) }) } func TestHistogramAsProto(t *testing.T) { @@ -209,6 +246,7 @@ func TestHistogramAsProto(t *testing.T) { cases := []struct { name string + vals []float64 in histogram exp *pbcloud.TrendHdrValue }{ @@ -219,7 +257,8 @@ func TestHistogramAsProto(t *testing.T) { }, { name: "not trackable values", - in: newHistogram([]float64{-0.23, 1<<30 + 1}), + in: newHistogram(), + vals: []float64{-0.23, 1<<30 + 1}, exp: &pbcloud.TrendHdrValue{ Count: 2, ExtraLowValuesCounter: uint32ptr(1), @@ -232,7 +271,8 @@ func TestHistogramAsProto(t *testing.T) { }, { name: "normal values", - in: newHistogram([]float64{2, 1.1, 3}), + in: newHistogram(), + vals: []float64{2, 1.1, 3}, exp: &pbcloud.TrendHdrValue{ Count: 3, ExtraLowValuesCounter: nil, @@ -246,6 +286,9 @@ func TestHistogramAsProto(t *testing.T) { } for _, tc := range cases { + for _, v := range tc.vals { + tc.in.addToBucket(v) + } tc.exp.MinResolution = 1.0 tc.exp.SignificantDigits = 2 tc.exp.Time = ×tamppb.Timestamp{Seconds: 1} @@ -253,10 +296,3 @@ func TestHistogramAsProto(t *testing.T) { assert.Equal(t, tc.exp, histogramAsProto(&tc.in, time.Unix(1, 0)), tc.name) } } - -func TestHistogramIsEmpty(t *testing.T) { - h := histogram{} - assert.True(t, h.IsEmpty()) - h.addToBucket(3.1) - assert.False(t, h.IsEmpty()) -}