From c457d190c665a4f2e2aca304a5da318c77608006 Mon Sep 17 00:00:00 2001 From: Annanay Date: Thu, 25 Mar 2021 16:34:22 +0530 Subject: [PATCH 1/3] Deprecate MaxSpansPerTrace in favour of MaxBytesPerTrace Signed-off-by: Annanay --- .../website/configuration/ingestion-limit.md | 12 +++---- .../max-trace-limit-reached.md | 2 +- integration/e2e/config-limits.yaml | 2 +- modules/ingester/instance.go | 4 +-- modules/ingester/instance_test.go | 18 +++++----- modules/ingester/trace.go | 23 +++++-------- modules/overrides/limits.go | 4 +-- modules/overrides/overrides.go | 6 ++-- modules/overrides/overrides_test.go | 20 +++++------ .../jsonnet/microservices/config.libsonnet | 2 +- pkg/util/test/req.go | 33 +++++++++++++++++++ 11 files changed, 77 insertions(+), 49 deletions(-) diff --git a/docs/tempo/website/configuration/ingestion-limit.md b/docs/tempo/website/configuration/ingestion-limit.md index d6797fcd746..cd1da721bb7 100644 --- a/docs/tempo/website/configuration/ingestion-limit.md +++ b/docs/tempo/website/configuration/ingestion-limit.md @@ -18,7 +18,7 @@ The following options can be used to limit ingestion: - `ingestion_burst_size` : Burst size used in span ingestion. Default is `100,000`. - `ingestion_rate_limit` : Per-user ingestion rate limit in spans per second. Default is `100,000`. - - `max_spans_per_trace` : Maximum number of spans per trace. `0` to disable. Default is `50,000`. + - `max_bytes_per_trace` : Maximum size of a single trace in bytes. `0` to disable. Default is `5,000,000` (~5MB). - `max_traces_per_user`: Maximum number of active traces per user, per ingester. `0` to disable. Default is `10,000`. Both the `ingestion_burst_size` and `ingestion_rate_limit` parameters control the rate limit. When these limits exceed the following message is logged: @@ -27,10 +27,10 @@ Both the `ingestion_burst_size` and `ingestion_rate_limit` parameters control th RATE_LIMITED: ingestion rate limit (100000 spans) exceeded while adding 10 spans ``` -When the limit for the `max_spans_per_trace` parameter exceeds the following message is logged: +When the limit for the `max_bytes_per_trace` parameter exceeds the following message is logged: ``` - TRACE_TOO_LARGE: totalSpans (50000) exceeded while adding 2 spans + TRACE_TOO_LARGE: max size of trace (5000000) exceeded while adding 387 bytes ``` Finally, when the limit for the `max_traces_per_user` parameter exceeds the following message is logged: @@ -50,7 +50,7 @@ To configure new ingestion limits that applies to all tenants of the cluster: overrides: ingestion_burst_size: 50000 ingestion_rate_limit: 50000 - max_spans_per_trace: 50000 + max_bytes_per_trace: 5_000_000 max_traces_per_user: 10000 ``` @@ -75,7 +75,7 @@ Sometimes you don't want all tenants within the cluster to have the same setting "": ingestion_max_batch_size: 5000 ingestion_rate_limit: 400000 - max_spans_per_trace: 250000 + max_bytes_per_trace: 25_000_000 max_traces_per_user: 100000 ``` @@ -87,7 +87,7 @@ This overrides file is dynamically loaded. It can be changed at runtime and wil "*": ingestion_max_batch_size: 5000 ingestion_rate_limit: 400000 - max_spans_per_trace: 250000 + max_bytes_per_trace: 25_000_000 max_traces_per_user: 100000 ``` diff --git a/docs/tempo/website/troubleshooting/max-trace-limit-reached.md b/docs/tempo/website/troubleshooting/max-trace-limit-reached.md index dde923ad662..71258d1071b 100644 --- a/docs/tempo/website/troubleshooting/max-trace-limit-reached.md +++ b/docs/tempo/website/troubleshooting/max-trace-limit-reached.md @@ -13,6 +13,6 @@ In high volume tracing environments the default trace limits are sometimes not s - `ingestion_burst_size` : Burst size used in span ingestion. Default is `100,000`. - `ingestion_rate_limit` : Per-user ingestion rate limit in spans per second. Default is `100,000`. - - `max_spans_per_trace` : Maximum number of spans per trace. `0` to disable. Default is `50,000`. + - `max_bytes_per_trace` : Maximum size of a single trace in bytes. `0` to disable. Default is `5,000,000` (~5MB). - `max_traces_per_user`: Maximum number of active traces per user, per ingester. `0` to disable. Default is `10,000`. - Increase the maximum limit to a failsafe value. For example, increase the limit for the `max_traces_per_user` parameter from `10,000` like `50000`. diff --git a/integration/e2e/config-limits.yaml b/integration/e2e/config-limits.yaml index 7753cfc1dfb..7652886113f 100644 --- a/integration/e2e/config-limits.yaml +++ b/integration/e2e/config-limits.yaml @@ -12,7 +12,7 @@ distributor: grpc: overrides: - max_spans_per_trace: 1 + max_bytes_per_trace: 1_000_000 max_traces_per_user: 1 ingestion_rate_limit: 5 ingestion_burst_size: 5 diff --git a/modules/ingester/instance.go b/modules/ingester/instance.go index fb467c79611..9365f5b8fcd 100644 --- a/modules/ingester/instance.go +++ b/modules/ingester/instance.go @@ -332,8 +332,8 @@ func (i *instance) getOrCreateTrace(req *tempopb.PushRequest) (*trace, error) { return nil, status.Errorf(codes.FailedPrecondition, "%s max live traces per tenant exceeded: %v", overrides.ErrorPrefixLiveTracesExceeded, err) } - maxSpans := i.limiter.limits.MaxSpansPerTrace(i.instanceID) - trace = newTrace(maxSpans, fp, traceID) + maxBytes := i.limiter.limits.MaxBytesPerTrace(i.instanceID) + trace = newTrace(maxBytes, fp, traceID) i.traces[fp] = trace i.tracesCreatedTotal.Inc() diff --git a/modules/ingester/instance_test.go b/modules/ingester/instance_test.go index 21309944ec9..cc8254cd8cf 100644 --- a/modules/ingester/instance_test.go +++ b/modules/ingester/instance_test.go @@ -215,7 +215,7 @@ func TestInstanceDoesNotRace(t *testing.T) { func TestInstanceLimits(t *testing.T) { limits, err := overrides.NewOverrides(overrides.Limits{ - MaxSpansPerTrace: 10, + MaxBytesPerTrace: 1000, }) assert.NoError(t, err, "unexpected error creating limits") limiter := NewLimiter(limits, &ringCountMock{count: 1}, 1) @@ -242,13 +242,13 @@ func TestInstanceLimits(t *testing.T) { name: "succeeds", pushes: []push{ { - req: test.MakeRequest(3, []byte{}), + req: test.MakeRequestWithByteLimit(300, []byte{}), }, { - req: test.MakeRequest(5, []byte{}), + req: test.MakeRequestWithByteLimit(500, []byte{}), }, { - req: test.MakeRequest(9, []byte{}), + req: test.MakeRequestWithByteLimit(900, []byte{}), }, }, }, @@ -256,14 +256,14 @@ func TestInstanceLimits(t *testing.T) { name: "one fails", pushes: []push{ { - req: test.MakeRequest(3, []byte{}), + req: test.MakeRequestWithByteLimit(300, []byte{}), }, { - req: test.MakeRequest(15, []byte{}), + req: test.MakeRequestWithByteLimit(1500, []byte{}), expectsError: true, }, { - req: test.MakeRequest(9, []byte{}), + req: test.MakeRequestWithByteLimit(900, []byte{}), }, }, }, @@ -271,10 +271,10 @@ func TestInstanceLimits(t *testing.T) { name: "multiple pushes same trace", pushes: []push{ { - req: test.MakeRequest(5, []byte{0x01}), + req: test.MakeRequestWithByteLimit(500, []byte{0x01}), }, { - req: test.MakeRequest(7, []byte{0x01}), + req: test.MakeRequestWithByteLimit(700, []byte{0x01}), expectsError: true, }, }, diff --git a/modules/ingester/trace.go b/modules/ingester/trace.go index ecf20ced62b..0734d1bab39 100644 --- a/modules/ingester/trace.go +++ b/modules/ingester/trace.go @@ -15,34 +15,29 @@ type trace struct { token uint32 lastAppend time.Time traceID []byte - maxSpans int - currentSpans int + maxBytes int + currentBytes int } -func newTrace(maxSpans int, token uint32, traceID []byte) *trace { +func newTrace(maxBytes int, token uint32, traceID []byte) *trace { return &trace{ token: token, trace: &tempopb.Trace{}, lastAppend: time.Now(), traceID: traceID, - maxSpans: maxSpans, + maxBytes: maxBytes, } } func (t *trace) Push(_ context.Context, req *tempopb.PushRequest) error { t.lastAppend = time.Now() - if t.maxSpans != 0 { - // count spans - spanCount := 0 - for _, ils := range req.Batch.InstrumentationLibrarySpans { - spanCount += len(ils.Spans) + if t.maxBytes != 0 { + reqSize := req.Size() + if t.currentBytes+reqSize > t.maxBytes { + return status.Errorf(codes.FailedPrecondition, "%s max size of trace (%d) exceeded while adding %d bytes", overrides.ErrorPrefixTraceTooLarge, t.maxBytes, reqSize) } - if t.currentSpans+spanCount > t.maxSpans { - return status.Errorf(codes.FailedPrecondition, "%s totalSpans (%d) exceeded while adding %d spans", overrides.ErrorPrefixTraceTooLarge, t.maxSpans, spanCount) - } - - t.currentSpans += spanCount + t.currentBytes += reqSize } t.trace.Batches = append(t.trace.Batches, req.Batch) diff --git a/modules/overrides/limits.go b/modules/overrides/limits.go index a150bc3a13f..1147dbceabe 100644 --- a/modules/overrides/limits.go +++ b/modules/overrides/limits.go @@ -30,7 +30,7 @@ type Limits struct { // Ingester enforced limits. MaxLocalTracesPerUser int `yaml:"max_traces_per_user"` MaxGlobalTracesPerUser int `yaml:"max_global_traces_per_user"` - MaxSpansPerTrace int `yaml:"max_spans_per_trace"` + MaxBytesPerTrace int `yaml:"max_bytes_per_trace"` // Compactor enforced limits. BlockRetention time.Duration `yaml:"block_retention"` @@ -50,7 +50,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { // Ingester limits f.IntVar(&l.MaxLocalTracesPerUser, "ingester.max-traces-per-user", 10e3, "Maximum number of active traces per user, per ingester. 0 to disable.") f.IntVar(&l.MaxGlobalTracesPerUser, "ingester.max-global-traces-per-user", 0, "Maximum number of active traces per user, across the cluster. 0 to disable.") - f.IntVar(&l.MaxSpansPerTrace, "ingester.max-spans-per-trace", 50e3, "Maximum number of spans per trace. 0 to disable.") + f.IntVar(&l.MaxBytesPerTrace, "ingester.max-bytes-per-trace", 50e5, "Maximum size of a trace in bytes. 0 to disable.") f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides.") f.DurationVar(&l.PerTenantOverridePeriod, "limits.per-user-override-period", 10*time.Second, "Period with this to reload the overrides.") diff --git a/modules/overrides/overrides.go b/modules/overrides/overrides.go index c3ef218eb9a..11fc3741238 100644 --- a/modules/overrides/overrides.go +++ b/modules/overrides/overrides.go @@ -142,9 +142,9 @@ func (o *Overrides) MaxGlobalTracesPerUser(userID string) int { return o.getOverridesForUser(userID).MaxGlobalTracesPerUser } -// MaxSpansPerTrace returns the maximum number of spans a user can have in a live trace. -func (o *Overrides) MaxSpansPerTrace(userID string) int { - return o.getOverridesForUser(userID).MaxSpansPerTrace +// MaxBytesPerTrace returns the maximum size of a single trace in bytes allowed for a user. +func (o *Overrides) MaxBytesPerTrace(userID string) int { + return o.getOverridesForUser(userID).MaxBytesPerTrace } // IngestionRateSpans is the number of spans per second allowed for this tenant diff --git a/modules/overrides/overrides_test.go b/modules/overrides/overrides_test.go index 1a47fbca45f..354318bcf0b 100644 --- a/modules/overrides/overrides_test.go +++ b/modules/overrides/overrides_test.go @@ -23,7 +23,7 @@ func TestOverrides(t *testing.T) { overrides *perTenantOverrides expectedMaxLocalTraces map[string]int expectedMaxGlobalTraces map[string]int - expectedMaxSpansPerTrace map[string]int + expectedMaxBytesPerTrace map[string]int expectedIngestionRateSpans map[string]int expectedIngestionBurstSpans map[string]int }{ @@ -32,13 +32,13 @@ func TestOverrides(t *testing.T) { limits: Limits{ MaxGlobalTracesPerUser: 1, MaxLocalTracesPerUser: 2, - MaxSpansPerTrace: 3, + MaxBytesPerTrace: 3, IngestionBurstSize: 4, IngestionRateSpans: 5, }, expectedMaxGlobalTraces: map[string]int{"user1": 1, "user2": 1}, expectedMaxLocalTraces: map[string]int{"user1": 2, "user2": 2}, - expectedMaxSpansPerTrace: map[string]int{"user1": 3, "user2": 3}, + expectedMaxBytesPerTrace: map[string]int{"user1": 3, "user2": 3}, expectedIngestionBurstSpans: map[string]int{"user1": 4, "user2": 4}, expectedIngestionRateSpans: map[string]int{"user1": 5, "user2": 5}, }, @@ -47,7 +47,7 @@ func TestOverrides(t *testing.T) { limits: Limits{ MaxGlobalTracesPerUser: 1, MaxLocalTracesPerUser: 2, - MaxSpansPerTrace: 3, + MaxBytesPerTrace: 3, IngestionBurstSize: 4, IngestionRateSpans: 5, }, @@ -56,7 +56,7 @@ func TestOverrides(t *testing.T) { "user1": { MaxGlobalTracesPerUser: 6, MaxLocalTracesPerUser: 7, - MaxSpansPerTrace: 8, + MaxBytesPerTrace: 8, IngestionBurstSize: 9, IngestionRateSpans: 10, }, @@ -64,7 +64,7 @@ func TestOverrides(t *testing.T) { }, expectedMaxGlobalTraces: map[string]int{"user1": 6, "user2": 1}, expectedMaxLocalTraces: map[string]int{"user1": 7, "user2": 2}, - expectedMaxSpansPerTrace: map[string]int{"user1": 8, "user2": 3}, + expectedMaxBytesPerTrace: map[string]int{"user1": 8, "user2": 3}, expectedIngestionBurstSpans: map[string]int{"user1": 9, "user2": 4}, expectedIngestionRateSpans: map[string]int{"user1": 10, "user2": 5}, }, @@ -73,7 +73,7 @@ func TestOverrides(t *testing.T) { limits: Limits{ MaxGlobalTracesPerUser: 1, MaxLocalTracesPerUser: 2, - MaxSpansPerTrace: 3, + MaxBytesPerTrace: 3, IngestionBurstSize: 4, IngestionRateSpans: 5, }, @@ -82,14 +82,14 @@ func TestOverrides(t *testing.T) { "user1": { MaxGlobalTracesPerUser: 6, MaxLocalTracesPerUser: 7, - MaxSpansPerTrace: 8, + MaxBytesPerTrace: 8, IngestionBurstSize: 9, IngestionRateSpans: 10, }, "*": { MaxGlobalTracesPerUser: 11, MaxLocalTracesPerUser: 12, - MaxSpansPerTrace: 13, + MaxBytesPerTrace: 13, IngestionBurstSize: 14, IngestionRateSpans: 15, }, @@ -97,7 +97,7 @@ func TestOverrides(t *testing.T) { }, expectedMaxGlobalTraces: map[string]int{"user1": 6, "user2": 11}, expectedMaxLocalTraces: map[string]int{"user1": 7, "user2": 12}, - expectedMaxSpansPerTrace: map[string]int{"user1": 8, "user2": 13}, + expectedMaxBytesPerTrace: map[string]int{"user1": 8, "user2": 13}, expectedIngestionBurstSpans: map[string]int{"user1": 9, "user2": 14}, expectedIngestionRateSpans: map[string]int{"user1": 10, "user2": 15}, }, diff --git a/operations/jsonnet/microservices/config.libsonnet b/operations/jsonnet/microservices/config.libsonnet index b007492787d..359d500b881 100644 --- a/operations/jsonnet/microservices/config.libsonnet +++ b/operations/jsonnet/microservices/config.libsonnet @@ -50,7 +50,7 @@ max_traces_per_user: 100000, ingestion_rate_limit: 150000, ingestion_burst_size: 150000, - max_spans_per_trace: 200e3, + max_bytes_per_trace: 300e5, // ~30MB }, }, }, diff --git a/pkg/util/test/req.go b/pkg/util/test/req.go index e8c8a1c12f8..bc704590d42 100644 --- a/pkg/util/test/req.go +++ b/pkg/util/test/req.go @@ -69,3 +69,36 @@ func MakeTraceWithSpanCount(requests int, spansEach int, traceID []byte) *tempop return trace } + +// Note that this fn will generate a request with size **close to** maxBytes +func MakeRequestWithByteLimit(maxBytes int, traceID []byte) *tempopb.PushRequest { + if len(traceID) == 0 { + traceID = make([]byte, 16) + rand.Read(traceID) + } + + req := &tempopb.PushRequest{ + Batch: &v1_trace.ResourceSpans{}, + } + + ils := &v1_trace.InstrumentationLibrarySpans{ + InstrumentationLibrary: &v1_common.InstrumentationLibrary{ + Name: "super library", + Version: "0.0.1", + }, + } + req.Batch.InstrumentationLibrarySpans = append(req.Batch.InstrumentationLibrarySpans, ils) + + for req.Size() < maxBytes { + sampleSpan := v1_trace.Span{ + Name: "test", + TraceId: traceID, + SpanId: make([]byte, 8), + } + rand.Read(sampleSpan.SpanId) + + ils.Spans = append(ils.Spans, &sampleSpan) + } + + return req +} \ No newline at end of file From 6b5071410ee6675b0c3dfa33a0402b699a31ec66 Mon Sep 17 00:00:00 2001 From: Annanay Date: Thu, 25 Mar 2021 16:58:14 +0530 Subject: [PATCH 2/3] Lint, CHANGELOG Signed-off-by: Annanay --- CHANGELOG.md | 2 ++ pkg/util/test/req.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b01bc46cf29..f3214d922f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ * [CHANGE] Update to Go 1.16, latest OpenTelemetry proto definition and collector [#546](https://github.com/grafana/tempo/pull/546) * [CHANGE] Tempo Query Frontend now accepts queries at `/tempo/api/traces/{traceID}` as opposed to `/api/traces/{traceID}`. This is a **breaking change**, make sure to change the Grafana Datasource endpoint accordingly. [#574](https://github.com/grafana/tempo/pull/574) +* [CHANGE] `max_spans_per_trace` limit override has been removed in favour of `max_bytes_per_trace`. + This is a **breaking change** to the overrides config section. [#612](https://github.com/grafana/tempo/pull/612) * [FEATURE] Add page based access to the index file. [#557](https://github.com/grafana/tempo/pull/557) * [ENHANCEMENT] Add a Shutdown handler to flush data to backend, at "/shutdown". [#526](https://github.com/grafana/tempo/pull/526) * [ENHANCEMENT] Queriers now query all (healthy) ingesters for a trace to mitigate 404s on ingester rollouts/scaleups. diff --git a/pkg/util/test/req.go b/pkg/util/test/req.go index bc704590d42..093dbef2720 100644 --- a/pkg/util/test/req.go +++ b/pkg/util/test/req.go @@ -101,4 +101,4 @@ func MakeRequestWithByteLimit(maxBytes int, traceID []byte) *tempopb.PushRequest } return req -} \ No newline at end of file +} From 2ae0fcfaa6428495b3a92c78aa7e3f8e93cf3291 Mon Sep 17 00:00:00 2001 From: Annanay Date: Thu, 25 Mar 2021 17:40:29 +0530 Subject: [PATCH 3/3] Fix e2e limits test Signed-off-by: Annanay --- integration/e2e/config-limits.yaml | 2 +- integration/e2e/limits_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/e2e/config-limits.yaml b/integration/e2e/config-limits.yaml index 7652886113f..cfee5cf9349 100644 --- a/integration/e2e/config-limits.yaml +++ b/integration/e2e/config-limits.yaml @@ -12,7 +12,7 @@ distributor: grpc: overrides: - max_bytes_per_trace: 1_000_000 + max_bytes_per_trace: 100 max_traces_per_user: 1 ingestion_rate_limit: 5 ingestion_burst_size: 5 diff --git a/integration/e2e/limits_test.go b/integration/e2e/limits_test.go index 676920e1af1..1d1f79e3be8 100644 --- a/integration/e2e/limits_test.go +++ b/integration/e2e/limits_test.go @@ -29,7 +29,7 @@ func TestLimits(t *testing.T) { require.NoError(t, err) require.NotNil(t, c) - // should fail b/c the trace is too large + // should fail b/c the trace is too large. each batch should be ~80 bytes batch := makeThriftBatchWithSpanCount(2) require.Error(t, c.EmitBatch(context.Background(), batch)) // should fail b/c this will be too many traces