From 088bb44022a9abb72855cfb6c62577c4ab840d58 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Wed, 1 Dec 2021 08:57:48 -0500 Subject: [PATCH 1/6] reduce compaction cycle to increase up time Signed-off-by: Joe Elliott --- tempodb/compactor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 0c0c0b66d56..04604e58708 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -52,7 +52,7 @@ const ( inputBlocks = 2 outputBlocks = 1 - compactionCycle = 30 * time.Second + compactionCycle = 500 * time.Millisecond DefaultFlushSizeBytes uint32 = 30 * 1024 * 1024 // 30 MiB From 5ba83a1887fdc9f25b797789be64c6818810139f Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Wed, 1 Dec 2021 09:02:49 -0500 Subject: [PATCH 2/6] increase waitOnStartup to account for reduced compcactionCycle Signed-off-by: Joe Elliott --- modules/compactor/compactor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/compactor/compactor.go b/modules/compactor/compactor.go index f0b6a9242c9..019515ce6b2 100644 --- a/modules/compactor/compactor.go +++ b/modules/compactor/compactor.go @@ -19,7 +19,7 @@ import ( ) const ( - waitOnStartup = time.Minute + waitOnStartup = 90 * time.Second ) type Compactor struct { From 7e05fb379e6483d8b01e410b9dc3d8a683ab1c1c Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Wed, 1 Dec 2021 09:07:05 -0500 Subject: [PATCH 3/6] changelog Signed-off-by: Joe Elliott --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dd4d608a12..023c833ac6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * [ENHANCEMENT] Improve memory efficiency of compaction and block cutting. [#1121](https://github.com/grafana/tempo/pull/1121) [#1130](https://github.com/grafana/tempo/pull/1130) (@joe-elliott) * [ENHANCEMENT] Include metrics for configured limit overrides and defaults: tempo_limits_overrides, tempo_limits_defaults [#1089](https://github.com/grafana/tempo/pull/1089) (@zalegrala) * [ENHANCEMENT] Add Envoy Proxy panel to `Tempo / Writes` dashboard [#1137](https://github.com/grafana/tempo/pull/1137) (@kvrhdn) +* [ENHANCEMENT] Reduce compactionCycle to improve performance in large multitenant environments [#1145](https://github.com/grafana/tempo/pull/1145) (@kvrhdn) * [BUGFIX] Fix defaults for MaxBytesPerTrace (ingester.max-bytes-per-trace) and MaxSearchBytesPerTrace (ingester.max-search-bytes-per-trace) (@bitprocessor) * [BUGFIX] Ignore empty objects during compaction [#1113](https://github.com/grafana/tempo/pull/1113) (@mdisibio) * [BUGFIX] Add process name to vulture traces to work around display issues [#1127](https://github.com/grafana/tempo/pull/1127) (@mdisibio) From 13d5272df3d5f84dfe08e73c96bc075ead3a7de8 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Wed, 1 Dec 2021 09:33:51 -0500 Subject: [PATCH 4/6] Added max compaction cycle param Signed-off-by: Joe Elliott --- docs/tempo/website/configuration/_index.md | 4 ++++ docs/tempo/website/configuration/manifest.md | 9 ++++++--- docs/tempo/website/configuration/polling.md | 4 ++++ modules/storage/config.go | 1 + tempodb/compactor.go | 2 +- tempodb/config.go | 1 + 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/tempo/website/configuration/_index.md b/docs/tempo/website/configuration/_index.md index 479f750a5d8..b60f3eb4411 100644 --- a/docs/tempo/website/configuration/_index.md +++ b/docs/tempo/website/configuration/_index.md @@ -438,6 +438,10 @@ storage: # Default 0 (disabled). [blocklist_poll_stale_tenant_index: ] + # The maximum amount of time to spend compacting a single tenant before moving to the next. + # Default is 5m. + [max_compaction_cycle: ] + # Cache type to use. Should be one of "redis", "memcached" # Example: "cache: memcached" [cache: ] diff --git a/docs/tempo/website/configuration/manifest.md b/docs/tempo/website/configuration/manifest.md index 1e08f95b7a9..1795f6de8ec 100644 --- a/docs/tempo/website/configuration/manifest.md +++ b/docs/tempo/website/configuration/manifest.md @@ -272,11 +272,12 @@ ingester: heartbeat_timeout: 5m0s replication_factor: 1 zone_awareness_enabled: false + excluded_zones: "" num_tokens: 128 heartbeat_period: 5s observe_period: 0s join_after: 0s - min_ready_duration: 1m0s + min_ready_duration: 15s interface_names: - eth0 - en0 @@ -284,6 +285,7 @@ ingester: tokens_file_path: "" availability_zone: "" unregister_on_shutdown: true + readiness_check_ring_health: true address: 127.0.0.1 port: 0 id: hostname @@ -319,6 +321,7 @@ storage: blocklist_poll_fallback: true blocklist_poll_tenant_index_builders: 2 blocklist_poll_stale_tenant_index: 0s + max_compaction_cycle: 5m0s backend: local local: path: /tmp/tempo/traces @@ -365,8 +368,8 @@ overrides: search_tags_allow_list: null max_traces_per_user: 10000 max_global_traces_per_user: 0 - max_bytes_per_trace: 50000 - max_search_bytes_per_trace: 0 + max_bytes_per_trace: 5000000 + max_search_bytes_per_trace: 50000 block_retention: 0s per_tenant_override_config: "" per_tenant_override_period: 10s diff --git a/docs/tempo/website/configuration/polling.md b/docs/tempo/website/configuration/polling.md index 71870cce229..f7b08384a51 100644 --- a/docs/tempo/website/configuration/polling.md +++ b/docs/tempo/website/configuration/polling.md @@ -30,6 +30,10 @@ storage: # the bucket contents. # Default 0 (disabled). [blocklist_poll_stale_tenant_index: ] + + # The maximum amount of time to spend compacting a single tenant before moving to the next. + # Default is 5m. + [max_compaction_cycle: ] ``` Due to the mechanics of the [tenant index]({{< relref "../operations/polling" >}}) the blocklist will be stale by diff --git a/modules/storage/config.go b/modules/storage/config.go index 0f0c1151836..dc062f3fe6b 100644 --- a/modules/storage/config.go +++ b/modules/storage/config.go @@ -28,6 +28,7 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) cfg.Trace.BlocklistPollFallback = true cfg.Trace.BlocklistPollConcurrency = tempodb.DefaultBlocklistPollConcurrency cfg.Trace.BlocklistPollTenantIndexBuilders = tempodb.DefaultTenantIndexBuilders + cfg.Trace.MaxCompactionCycle = tempodb.DefaultBlocklistPoll f.StringVar(&cfg.Trace.Backend, util.PrefixConfig(prefix, "trace.backend"), "", "Trace backend (s3, azure, gcs, local)") f.DurationVar(&cfg.Trace.BlocklistPoll, util.PrefixConfig(prefix, "trace.blocklist_poll"), tempodb.DefaultBlocklistPoll, "Period at which to run the maintenance cycle.") diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 04604e58708..05ff1cc83a5 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -112,7 +112,7 @@ func (rw *readerWriter) doCompaction() { } // after a maintenance cycle bail out - if start.Add(rw.cfg.BlocklistPoll).Before(time.Now()) { + if start.Add(rw.cfg.MaxCompactionCycle).Before(time.Now()) { level.Info(rw.logger).Log("msg", "compacted blocks for a maintenance cycle, bailing out", "tenantID", tenantID) break } diff --git a/tempodb/config.go b/tempodb/config.go index d9e047f6b30..ffe92080a6f 100644 --- a/tempodb/config.go +++ b/tempodb/config.go @@ -36,6 +36,7 @@ type Config struct { BlocklistPollFallback bool `yaml:"blocklist_poll_fallback"` BlocklistPollTenantIndexBuilders int `yaml:"blocklist_poll_tenant_index_builders"` BlocklistPollStaleTenantIndex time.Duration `yaml:"blocklist_poll_stale_tenant_index"` + MaxCompactionCycle time.Duration `yaml:"max_compaction_cycle"` // backends Backend string `yaml:"backend"` From bfd399b3e7c64e5725575d9d802e84dfdda161a0 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Wed, 1 Dec 2021 09:43:49 -0500 Subject: [PATCH 5/6] changelog Signed-off-by: Joe Elliott --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 023c833ac6e..ad86ace43b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ * [ENHANCEMENT] Improve memory efficiency of compaction and block cutting. [#1121](https://github.com/grafana/tempo/pull/1121) [#1130](https://github.com/grafana/tempo/pull/1130) (@joe-elliott) * [ENHANCEMENT] Include metrics for configured limit overrides and defaults: tempo_limits_overrides, tempo_limits_defaults [#1089](https://github.com/grafana/tempo/pull/1089) (@zalegrala) * [ENHANCEMENT] Add Envoy Proxy panel to `Tempo / Writes` dashboard [#1137](https://github.com/grafana/tempo/pull/1137) (@kvrhdn) -* [ENHANCEMENT] Reduce compactionCycle to improve performance in large multitenant environments [#1145](https://github.com/grafana/tempo/pull/1145) (@kvrhdn) +* [ENHANCEMENT] Reduce compactionCycle to improve performance in large multitenant environments [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) +* [ENHANCEMENT] Added max_compaction_cycle to allow for independently configuring polling and compaction cycle. [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) * [BUGFIX] Fix defaults for MaxBytesPerTrace (ingester.max-bytes-per-trace) and MaxSearchBytesPerTrace (ingester.max-search-bytes-per-trace) (@bitprocessor) * [BUGFIX] Ignore empty objects during compaction [#1113](https://github.com/grafana/tempo/pull/1113) (@mdisibio) * [BUGFIX] Add process name to vulture traces to work around display issues [#1127](https://github.com/grafana/tempo/pull/1127) (@mdisibio) From 150a17cd276c95a16a3fc3826c1e8d9c63d50fd9 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Thu, 2 Dec 2021 07:25:59 -0500 Subject: [PATCH 6/6] Split defaults Signed-off-by: Joe Elliott --- modules/storage/config.go | 2 +- tempodb/config.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/storage/config.go b/modules/storage/config.go index dc062f3fe6b..481eccbd625 100644 --- a/modules/storage/config.go +++ b/modules/storage/config.go @@ -28,7 +28,7 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) cfg.Trace.BlocklistPollFallback = true cfg.Trace.BlocklistPollConcurrency = tempodb.DefaultBlocklistPollConcurrency cfg.Trace.BlocklistPollTenantIndexBuilders = tempodb.DefaultTenantIndexBuilders - cfg.Trace.MaxCompactionCycle = tempodb.DefaultBlocklistPoll + cfg.Trace.MaxCompactionCycle = tempodb.DefaultMaxCompactionCycle f.StringVar(&cfg.Trace.Backend, util.PrefixConfig(prefix, "trace.backend"), "", "Trace backend (s3, azure, gcs, local)") f.DurationVar(&cfg.Trace.BlocklistPoll, util.PrefixConfig(prefix, "trace.blocklist_poll"), tempodb.DefaultBlocklistPoll, "Period at which to run the maintenance cycle.") diff --git a/tempodb/config.go b/tempodb/config.go index ffe92080a6f..5c53cfe8940 100644 --- a/tempodb/config.go +++ b/tempodb/config.go @@ -19,6 +19,7 @@ import ( const ( DefaultBlocklistPoll = 5 * time.Minute + DefaultMaxCompactionCycle = 5 * time.Minute DefaultBlocklistPollConcurrency = uint(50) DefaultRetentionConcurrency = uint(10) DefaultTenantIndexBuilders = 2