From d10f92108a6886f196cc96361445248c7e142c49 Mon Sep 17 00:00:00 2001
From: Ivan <2103732+codebien@users.noreply.github.com>
Date: Wed, 24 May 2023 00:53:27 +0200
Subject: [PATCH] Store only significant buckets

Not allocate anymore buckets before and after the first and the last
non-zero bucket. It saves a bunch of memory.
---
 output/cloud/expv2/hdr.go      |  90 +++++++++----------
 output/cloud/expv2/hdr_test.go | 156 ++++++++++++++++++++-------------
 2 files changed, 137 insertions(+), 109 deletions(-)

diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go
index 1f289f75eb3b..faa8d0ac3eae 100644
--- a/output/cloud/expv2/hdr.go
+++ b/output/cloud/expv2/hdr.go
@@ -86,18 +86,8 @@ type histogram struct {
 // doesn't make any sense in this way.
 // It aggregates calling addToBucket time-to-time so
 // trimzeros has to be called at the end of the process.
-func newHistogram(values []float64) histogram {
-	h := histogram{}
-	if len(values) < 1 {
-		return h
-	}
-
-	for i := 0; i < len(values); i++ {
-		h.addToBucket(values[i])
-	}
-
-	h.trimzeros()
-	return h
+func newHistogram() histogram {
+	return histogram{}
 }
 
 // addToBucket increments the counter of the bucket
@@ -105,12 +95,6 @@ func newHistogram(values []float64) histogram {
 // If the value is lower or higher than the trackable limits
 // then it is counted into specific buckets.
 // All the stats are also updated accordingly.
-//
-// TODO: add the test case in units doing
-// addToBucket + trimzeros + addToBucket
-// so calling addToBucket after trimzeros was called.
-// We don't expect to have this case but the current API
-// would support it so we need to make sure it works or refactor.
 func (h *histogram) addToBucket(v float64) {
 	if h.Count == 0 {
 		h.Max, h.Min = v, v
@@ -136,64 +120,72 @@ func (h *histogram) addToBucket(v float64) {
 	}
 
 	index := resolveBucketIndex(v)
+
 	blen := uint32(len(h.Buckets))
 	if blen == 0 {
 		h.FirstNotZeroBucket = index
 		h.LastNotZeroBucket = index
+		h.Buckets = make([]uint32, 1, 32)
 	} else {
 		if index < h.FirstNotZeroBucket {
+			h.growLeft(index)
 			h.FirstNotZeroBucket = index
 		}
 		if index > h.LastNotZeroBucket {
+			h.growRight(index)
 			h.LastNotZeroBucket = index
 		}
 	}
 
-	if index >= blen {
-		h.grow(index)
+	h.Buckets[index-h.FirstNotZeroBucket]++
+}
+
+func (h *histogram) growLeft(index uint32) {
+	if h.FirstNotZeroBucket <= index {
+		panic("buckets is already contains the requested index")
+	}
+
+	newLen := (h.FirstNotZeroBucket - index) + uint32(len(h.Buckets))
+
+	// there is enough capacity in the undrelying array
+	// reuse it slicing without allocate a new array
+	if uint32(cap(h.Buckets)) >= newLen {
+		upIndex := len(h.Buckets) - 1
+		h.Buckets = h.Buckets[:newLen]
+		h.Buckets = append(h.Buckets[upIndex+1:], h.Buckets[:upIndex+1]...)
+		return
 	}
-	h.Buckets[index]++
+
+	// there isn't enough capacity, allocate a new slice
+	// able to contain the new size
+
+	newBuckets := make([]uint32, newLen)
+	copy(newBuckets[h.FirstNotZeroBucket-index:], h.Buckets)
+	h.Buckets = newBuckets
 }
 
-// grow expands the buckets slice
-// with zeros up to the required index
-func (h *histogram) grow(index uint32) {
-	i := int(index)
-	if len(h.Buckets)-1 > i {
+// growRight expands the buckets slice
+// with zeros up to the required index.
+// If it array has enough capacity then it reuses it without allocate.
+func (h *histogram) growRight(index uint32) {
+	if h.LastNotZeroBucket >= index {
 		panic("buckets is already bigger than requested index")
 	}
-	if cap(h.Buckets) > i {
+
+	newLen := index - h.FirstNotZeroBucket + 1
+
+	if uint32(cap(h.Buckets)) > newLen {
 		// See https://go.dev/ref/spec#Slice_expressions
 		// "For slices, the upper index bound is
 		// the slice capacity cap(a) rather than the length"
-		h.Buckets = h.Buckets[:index+1]
+		h.Buckets = h.Buckets[:newLen]
 	} else {
-		length := i + 1
-		// let's make two times larger of
-		// the current request
-		newBuckets := make([]uint32, length, (len(h.Buckets)+length)*2)
+		newBuckets := make([]uint32, newLen)
 		copy(newBuckets, h.Buckets)
 		h.Buckets = newBuckets
 	}
 }
 
-// trimzeros removes all buckets that have a zero value
-// from the begin and from the end until
-// the first not zero bucket.
-func (h *histogram) trimzeros() {
-	if h.Count < 1 || len(h.Buckets) < 1 {
-		return
-	}
-
-	// all the counters are set to zero, we can remove all
-	if h.FirstNotZeroBucket == 0 && h.LastNotZeroBucket == 0 {
-		h.Buckets = []uint32{}
-		return
-	}
-
-	h.Buckets = h.Buckets[h.FirstNotZeroBucket : h.LastNotZeroBucket+1]
-}
-
 // histogramAsProto converts the histogram into the equivalent Protobuf version.
 func histogramAsProto(h *histogram, time time.Time) *pbcloud.TrendHdrValue {
 	hval := &pbcloud.TrendHdrValue{
diff --git a/output/cloud/expv2/hdr_test.go b/output/cloud/expv2/hdr_test.go
index 3fefad8ca619..9ef18065fb83 100644
--- a/output/cloud/expv2/hdr_test.go
+++ b/output/cloud/expv2/hdr_test.go
@@ -6,6 +6,7 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 	"go.k6.io/k6/output/cloud/expv2/pbcloud"
 	"google.golang.org/protobuf/types/known/timestamppb"
 )
@@ -39,25 +40,86 @@ func TestValueBacket(t *testing.T) {
 
 func TestNewHistogramWithSimpleValue(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{100})
+
+	res := newHistogram()
+	res.addToBucket(8)
+	res.addToBucket(5)
 
 	exp := histogram{
-		Buckets:            []uint32{1},
+		Buckets:            []uint32{1, 0, 0, 1},
+		FirstNotZeroBucket: 5,
+		LastNotZeroBucket:  8,
+		ExtraLowBucket:     0,
+		ExtraHighBucket:    0,
+		Max:                8,
+		Min:                5,
+		Sum:                13,
+		Count:              2,
+	}
+
+	require.Equal(t, exp, res)
+
+	res = newHistogram()
+	res.addToBucket(100)
+	res.addToBucket(101)
+
+	exp = histogram{
+		Buckets:            []uint32{1, 1},
 		FirstNotZeroBucket: 100,
-		LastNotZeroBucket:  100,
+		LastNotZeroBucket:  101,
 		ExtraLowBucket:     0,
 		ExtraHighBucket:    0,
-		Max:                100,
+		Max:                101,
 		Min:                100,
-		Sum:                100,
-		Count:              1,
+		Sum:                201,
+		Count:              2,
 	}
+
+	res = newHistogram()
+	res.addToBucket(101)
+	res.addToBucket(100)
+
+	exp = histogram{
+		Buckets:            []uint32{1, 1},
+		FirstNotZeroBucket: 100,
+		LastNotZeroBucket:  101,
+		ExtraLowBucket:     0,
+		ExtraHighBucket:    0,
+		Max:                101,
+		Min:                100,
+		Sum:                201,
+		Count:              2,
+	}
+	assert.Equal(t, exp, res)
+
+	res = newHistogram()
+	res.addToBucket(8)
+	res.addToBucket(9)
+	res.addToBucket(10)
+	res.addToBucket(5)
+
+	exp = histogram{
+		Buckets:            []uint32{1, 0, 0, 1, 1, 1},
+		FirstNotZeroBucket: 5,
+		LastNotZeroBucket:  10,
+		ExtraLowBucket:     0,
+		ExtraHighBucket:    0,
+		Max:                10,
+		Min:                5,
+		Sum:                32,
+		Count:              4,
+	}
+
 	assert.Equal(t, exp, res)
 }
 
 func TestNewHistogramWithUntrackables(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{5, -3.14, 2 * 1e9, 1})
+
+	res := newHistogram()
+	for _, v := range []float64{5, -3.14, 2 * 1e9, 1} {
+		res.addToBucket(v)
+	}
 
 	exp := histogram{
 		Buckets:            []uint32{1, 0, 0, 0, 1},
@@ -75,7 +137,11 @@ func TestNewHistogramWithUntrackables(t *testing.T) {
 
 func TestNewHistogramWithMultipleValues(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{51.8, 103.6, 103.6, 103.6, 103.6})
+
+	res := newHistogram()
+	for _, v := range []float64{51.8, 103.6, 103.6, 103.6, 103.6} {
+		res.addToBucket(v)
+	}
 
 	exp := histogram{
 		FirstNotZeroBucket: 52,
@@ -94,7 +160,9 @@ func TestNewHistogramWithMultipleValues(t *testing.T) {
 
 func TestNewHistogramWithNegativeNum(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{-2.42314})
+
+	res := newHistogram()
+	res.addToBucket(-2.42314)
 
 	exp := histogram{
 		FirstNotZeroBucket: 0,
@@ -111,7 +179,10 @@ func TestNewHistogramWithNegativeNum(t *testing.T) {
 
 func TestNewHistogramWithMultipleNegativeNums(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{-0.001, -0.001, -0.001})
+	res := newHistogram()
+	for _, v := range []float64{-0.001, -0.001, -0.001} {
+		res.addToBucket(v)
+	}
 
 	exp := histogram{
 		Buckets:            nil,
@@ -128,7 +199,8 @@ func TestNewHistogramWithMultipleNegativeNums(t *testing.T) {
 
 func TestNewHistoramWithNoVals(t *testing.T) {
 	t.Parallel()
-	res := newHistogram([]float64{})
+
+	res := newHistogram()
 	exp := histogram{
 		Buckets:            nil,
 		FirstNotZeroBucket: 0,
@@ -141,63 +213,28 @@ func TestNewHistoramWithNoVals(t *testing.T) {
 	assert.Equal(t, exp, res)
 }
 
-func TestHistogramTrimzeros(t *testing.T) {
-	t.Parallel()
-
-	cases := []struct {
-		in  histogram
-		exp []uint32
-	}{
-		{in: histogram{Buckets: []uint32{}}, exp: []uint32{}},
-		{in: histogram{Buckets: []uint32{0}}, exp: []uint32{}},
-		{in: histogram{Buckets: []uint32{0, 0, 0}}, exp: []uint32{}},
-		{
-			in: histogram{
-				Buckets:            []uint32{0, 0, 0, 0, 0, 0, 1, 0},
-				FirstNotZeroBucket: 6,
-				LastNotZeroBucket:  6,
-			},
-			exp: []uint32{1},
-		},
-		{
-			in: histogram{
-				Buckets:            []uint32{0, 0, 0, 1, 9, 0, 0, 1, 0, 0, 0},
-				FirstNotZeroBucket: 3,
-				LastNotZeroBucket:  7,
-			},
-			exp: []uint32{1, 9, 0, 0, 1},
-		},
-	}
-
-	for _, tc := range cases {
-		h := tc.in
-		h.Count = 1
-		h.trimzeros()
-		assert.Equal(t, tc.exp, h.Buckets, tc.in.Buckets)
-	}
-}
-
-func TestHistogramGrow(t *testing.T) {
+func TestHistogramGrowRight(t *testing.T) {
 	t.Parallel()
 	h := histogram{}
 
 	// the cap is smaller than requested index
 	// so it creates a new slice
-	h.grow(3)
+	h.growRight(3)
 	assert.Len(t, h.Buckets, 4)
 
 	// it must preserve already existing items
 	h.Buckets[2] = 101
 
 	// it appends to the same slice
-	h.grow(5)
+	h.growRight(5)
 	assert.Len(t, h.Buckets, 6)
 	assert.Equal(t, uint32(101), h.Buckets[2])
 	assert.Equal(t, uint32(0), h.Buckets[5])
 
 	// it is not possible to request an index smaller than
 	// the last already available index
-	assert.Panics(t, func() { h.grow(4) })
+	h.LastNotZeroBucket = 5
+	assert.Panics(t, func() { h.growRight(4) })
 }
 
 func TestHistogramAsProto(t *testing.T) {
@@ -209,6 +246,7 @@ func TestHistogramAsProto(t *testing.T) {
 
 	cases := []struct {
 		name string
+		vals []float64
 		in   histogram
 		exp  *pbcloud.TrendHdrValue
 	}{
@@ -219,7 +257,8 @@ func TestHistogramAsProto(t *testing.T) {
 		},
 		{
 			name: "not trackable values",
-			in:   newHistogram([]float64{-0.23, 1<<30 + 1}),
+			in:   newHistogram(),
+			vals: []float64{-0.23, 1<<30 + 1},
 			exp: &pbcloud.TrendHdrValue{
 				Count:                  2,
 				ExtraLowValuesCounter:  uint32ptr(1),
@@ -232,7 +271,8 @@ func TestHistogramAsProto(t *testing.T) {
 		},
 		{
 			name: "normal values",
-			in:   newHistogram([]float64{2, 1.1, 3}),
+			in:   newHistogram(),
+			vals: []float64{2, 1.1, 3},
 			exp: &pbcloud.TrendHdrValue{
 				Count:                  3,
 				ExtraLowValuesCounter:  nil,
@@ -246,6 +286,9 @@ func TestHistogramAsProto(t *testing.T) {
 	}
 
 	for _, tc := range cases {
+		for _, v := range tc.vals {
+			tc.in.addToBucket(v)
+		}
 		tc.exp.MinResolution = 1.0
 		tc.exp.SignificantDigits = 2
 		tc.exp.Time = &timestamppb.Timestamp{Seconds: 1}
@@ -253,10 +296,3 @@ func TestHistogramAsProto(t *testing.T) {
 		assert.Equal(t, tc.exp, histogramAsProto(&tc.in, time.Unix(1, 0)), tc.name)
 	}
 }
-
-func TestHistogramIsEmpty(t *testing.T) {
-	h := histogram{}
-	assert.True(t, h.IsEmpty())
-	h.addToBucket(3.1)
-	assert.False(t, h.IsEmpty())
-}