diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a666cd1fd7..99672721fc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ * [ENHANCEMENT] Mixin, make recording rule range interval configurable and increase range interval in alert to support scrape interval of 1 minute [#3851](https://github.com/grafana/tempo/pull/3851) (@jmichalek132) * [ENHANCEMENT] Add vParquet4 support to the tempo-cli analyse blocks command [#3868](https://github.com/grafana/tempo/pull/3868) (@stoewer) * [ENHANCEMENT] Improve trace id lookup from Tempo Vulture by selecting a date range [#3874](https://github.com/grafana/tempo/pull/3874) (@javiermolinar) +* [ENHANCEMENT] Add native histograms for internal metrics[#3870](https://github.com/grafana/tempo/pull/3870) (@zalegrala) * [BUGFIX] Fix panic in certain metrics queries using `rate()` with `by` [#3847](https://github.com/grafana/tempo/pull/3847) (@stoewer) * [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio) * [BUGFIX] Fix frontend parsing error on cached responses [#3759](https://github.com/grafana/tempo/pull/3759) (@mdisibio) diff --git a/modules/distributor/distributor.go b/modules/distributor/distributor.go index 159048ce621..2ca044e8466 100644 --- a/modules/distributor/distributor.go +++ b/modules/distributor/distributor.go @@ -91,10 +91,13 @@ var ( Help: "The total number of proto bytes received per tenant", }, []string{"tenant"}) metricTracesPerBatch = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "distributor_traces_per_batch", - Help: "The number of traces in each batch", - Buckets: prometheus.ExponentialBuckets(2, 2, 10), + Namespace: "tempo", + Name: "distributor_traces_per_batch", + Help: "The number of traces in each batch", + Buckets: prometheus.ExponentialBuckets(2, 2, 10), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) metricIngesterClients = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: "tempo", diff --git a/modules/distributor/receiver/shim.go b/modules/distributor/receiver/shim.go index a5625cb92d2..2b9624488ca 100644 --- a/modules/distributor/receiver/shim.go +++ b/modules/distributor/receiver/shim.go @@ -49,10 +49,13 @@ const ( var ( metricPushDuration = promauto.NewHistogram(prom_client.HistogramOpts{ - Namespace: "tempo", - Name: "distributor_push_duration_seconds", - Help: "Records the amount of time to push a batch to the ingester.", - Buckets: prom_client.DefBuckets, + Namespace: "tempo", + Name: "distributor_push_duration_seconds", + Help: "Records the amount of time to push a batch to the ingester.", + Buckets: prom_client.DefBuckets, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) statReceiverOtlp = usagestats.NewInt("receiver_enabled_otlp") diff --git a/modules/frontend/pipeline/sync_handler_retry.go b/modules/frontend/pipeline/sync_handler_retry.go index 9c92ddd8448..d95ae39cf91 100644 --- a/modules/frontend/pipeline/sync_handler_retry.go +++ b/modules/frontend/pipeline/sync_handler_retry.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "strings" + "time" "github.com/grafana/dskit/httpgrpc" "github.com/grafana/tempo/modules/frontend/queue" @@ -18,10 +19,13 @@ import ( func NewRetryWare(maxRetries int, registerer prometheus.Registerer) Middleware { retriesCount := promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "query_frontend_retries", - Help: "Number of times a request is retried.", - Buckets: []float64{0, 1, 2, 3, 4, 5}, + Namespace: "tempo", + Name: "query_frontend_retries", + Help: "Number of times a request is retried.", + Buckets: []float64{0, 1, 2, 3, 4, 5}, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) return MiddlewareFunc(func(next http.RoundTripper) http.RoundTripper { diff --git a/modules/frontend/slos.go b/modules/frontend/slos.go index 9a19728d45b..8acf55d1826 100644 --- a/modules/frontend/slos.go +++ b/modules/frontend/slos.go @@ -44,10 +44,13 @@ var ( metricsCounter = queriesPerTenant.MustCurryWith(prometheus.Labels{"op": metricsOp}) queryThroughput = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "query_frontend_bytes_processed_per_second", - Help: "Bytes processed per second in the query per tenant", - Buckets: prometheus.ExponentialBuckets(8*1024*1024, 2, 12), // from 8MB up to 16GB + Namespace: "tempo", + Name: "query_frontend_bytes_processed_per_second", + Help: "Bytes processed per second in the query per tenant", + Buckets: prometheus.ExponentialBuckets(8*1024*1024, 2, 12), // from 8MB up to 16GB + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }, []string{"tenant", "op"}) searchThroughput = queryThroughput.MustCurryWith(prometheus.Labels{"op": searchOp}) diff --git a/modules/frontend/v1/frontend.go b/modules/frontend/v1/frontend.go index e5f4ade8e69..6ccfad6275f 100644 --- a/modules/frontend/v1/frontend.go +++ b/modules/frontend/v1/frontend.go @@ -100,9 +100,12 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist Help: "Total number of query requests discarded.", }, []string{"user"}), queueDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Name: "tempo_query_frontend_queue_duration_seconds", - Help: "Time spend by requests queued.", - Buckets: prometheus.DefBuckets, + Name: "tempo_query_frontend_queue_duration_seconds", + Help: "Time spend by requests queued.", + Buckets: prometheus.DefBuckets, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }), actualBatchSize: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ Name: "tempo_query_frontend_actual_batch_size", diff --git a/modules/ingester/flush.go b/modules/ingester/flush.go index 88a718ee8f6..42c92c524f8 100644 --- a/modules/ingester/flush.go +++ b/modules/ingester/flush.go @@ -45,10 +45,13 @@ var ( Help: "The total number of failed retries after a failed flush", }) metricFlushDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "ingester_flush_duration_seconds", - Help: "Records the amount of time to flush a complete block.", - Buckets: prometheus.ExponentialBuckets(1, 2, 10), + Namespace: "tempo", + Name: "ingester_flush_duration_seconds", + Help: "Records the amount of time to flush a complete block.", + Buckets: prometheus.ExponentialBuckets(1, 2, 10), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) metricFlushSize = promauto.NewHistogram(prometheus.HistogramOpts{ Namespace: "tempo", diff --git a/modules/querier/external/client.go b/modules/querier/external/client.go index 07324bafd12..3b77edad92c 100644 --- a/modules/querier/external/client.go +++ b/modules/querier/external/client.go @@ -21,10 +21,13 @@ import ( var ( metricEndpointDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "querier_external_endpoint_duration_seconds", - Help: "The duration of the external endpoints.", - Buckets: prometheus.DefBuckets, + Namespace: "tempo", + Name: "querier_external_endpoint_duration_seconds", + Help: "The duration of the external endpoints.", + Buckets: prometheus.DefBuckets, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }, []string{"endpoint"}) metricExternalHedgedRequests = promauto.NewGauge( prometheus.GaugeOpts{ diff --git a/pkg/cache/memcached.go b/pkg/cache/memcached.go index a9bb51eb8f9..560fe0b82b6 100644 --- a/pkg/cache/memcached.go +++ b/pkg/cache/memcached.go @@ -60,8 +60,11 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg Name: "memcache_request_duration_seconds", Help: "Total time spent in seconds doing memcache requests.", // Memcached requests are very quick: smallest bucket is 16us, biggest is 1s - Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8), - ConstLabels: prometheus.Labels{"name": name}, + Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, + ConstLabels: prometheus.Labels{"name": name}, }, []string{"method", "status_code"}), ), } diff --git a/pkg/cache/redis_cache.go b/pkg/cache/redis_cache.go index ff4e370a351..a0082e5f402 100644 --- a/pkg/cache/redis_cache.go +++ b/pkg/cache/redis_cache.go @@ -2,6 +2,7 @@ package cache import ( "context" + "time" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -31,11 +32,14 @@ func NewRedisCache(name string, redisClient *RedisClient, reg prometheus.Registe logger: logger, requestDuration: instr.NewHistogramCollector( promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "rediscache_request_duration_seconds", - Help: "Total time spent in seconds doing Redis requests.", - Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8), - ConstLabels: prometheus.Labels{"name": name}, + Namespace: "tempo", + Name: "rediscache_request_duration_seconds", + Help: "Total time spent in seconds doing Redis requests.", + Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, + ConstLabels: prometheus.Labels{"name": name}, }, []string{"method", "status_code"}), ), } diff --git a/tempodb/backend/instrumentation/backend_transports.go b/tempodb/backend/instrumentation/backend_transports.go index a3039ff4136..0f3b38a4be9 100644 --- a/tempodb/backend/instrumentation/backend_transports.go +++ b/tempodb/backend/instrumentation/backend_transports.go @@ -10,10 +10,13 @@ import ( ) var requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempodb", - Name: "backend_request_duration_seconds", - Help: "Time spent doing backend storage requests.", - Buckets: prometheus.ExponentialBuckets(0.005, 4, 6), + Namespace: "tempodb", + Name: "backend_request_duration_seconds", + Help: "Time spent doing backend storage requests.", + Buckets: prometheus.ExponentialBuckets(0.005, 4, 6), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }, []string{"operation", "status_code"}) type instrumentedTransport struct { diff --git a/tempodb/blocklist/poller.go b/tempodb/blocklist/poller.go index c48572d59d8..1e7d354b305 100644 --- a/tempodb/blocklist/poller.go +++ b/tempodb/blocklist/poller.go @@ -46,10 +46,13 @@ var ( Help: "Total number of times an error occurred while polling the blocklist.", }, []string{"tenant"}) metricBlocklistPollDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "tempodb", - Name: "blocklist_poll_duration_seconds", - Help: "Records the amount of time to poll and update the blocklist.", - Buckets: prometheus.LinearBuckets(0, 60, 10), + Namespace: "tempodb", + Name: "blocklist_poll_duration_seconds", + Help: "Records the amount of time to poll and update the blocklist.", + Buckets: prometheus.DefBuckets, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) metricBlocklistLength = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "tempodb", diff --git a/tempodb/tempodb.go b/tempodb/tempodb.go index 6396499c390..369db4e9ee6 100644 --- a/tempodb/tempodb.go +++ b/tempodb/tempodb.go @@ -45,10 +45,13 @@ const ( var ( metricRetentionDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "tempodb", - Name: "retention_duration_seconds", - Help: "Records the amount of time to perform retention tasks.", - Buckets: prometheus.ExponentialBuckets(.25, 2, 6), + Namespace: "tempodb", + Name: "retention_duration_seconds", + Help: "Records the amount of time to perform retention tasks.", + Buckets: prometheus.ExponentialBuckets(.25, 2, 6), + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, }) metricRetentionErrors = promauto.NewCounter(prometheus.CounterOpts{ Namespace: "tempodb",