diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index d641e1d098db..0e9183e0239d 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -34,6 +34,7 @@ Version history * listeners: added :ref:`continue_on_listener_filters_timeout ` to configure whether a listener will still create a connection when listener filters time out. * listeners: added :ref:`HTTP inspector listener filter `. * lua: extended `httpCall()` and `respond()` APIs to accept headers with entry values that can be a string or table of strings. +* metrics_service: added support for flushing histogram buckets. * outlier_detector: added :ref:`support for the grpc-status response header ` by mapping it to HTTP status. Guarded by envoy.reloadable_features.outlier_detection_support_for_grpc_status which defaults to true. * performance: new buffer implementation enabled by default (to disable add "--use-libevent-buffers 1" to the command-line arguments when starting Envoy). * performance: stats symbol table implementation (disabled by default; to test it, add "--use-fake-symbol-table 0" to the command-line arguments when starting Envoy). diff --git a/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.cc b/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.cc index 3ecd4d2c2831..f37d2c984fd3 100644 --- a/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.cc +++ b/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.cc @@ -60,21 +60,43 @@ void MetricsServiceSink::flushGauge(const Stats::Gauge& gauge) { gauage_metric->set_value(gauge.value()); } -void MetricsServiceSink::flushHistogram(const Stats::ParentHistogram& histogram) { - io::prometheus::client::MetricFamily* metrics_family = message_.add_envoy_metrics(); - metrics_family->set_type(io::prometheus::client::MetricType::SUMMARY); - metrics_family->set_name(histogram.name()); - auto* metric = metrics_family->add_metric(); - metric->set_timestamp_ms(std::chrono::duration_cast( - time_source_.systemTime().time_since_epoch()) - .count()); - auto* summary_metric = metric->mutable_summary(); - const Stats::HistogramStatistics& hist_stats = histogram.intervalStatistics(); +void MetricsServiceSink::flushHistogram(const Stats::ParentHistogram& envoy_histogram) { + // TODO(ramaraochavali): Currently we are sending both quantile information and bucket + // information. We should make this configurable if it turns out that sending both affects + // performance. + + // Add summary information for histograms. + io::prometheus::client::MetricFamily* summary_metrics_family = message_.add_envoy_metrics(); + summary_metrics_family->set_type(io::prometheus::client::MetricType::SUMMARY); + summary_metrics_family->set_name(envoy_histogram.name()); + auto* summary_metric = summary_metrics_family->add_metric(); + summary_metric->set_timestamp_ms(std::chrono::duration_cast( + time_source_.systemTime().time_since_epoch()) + .count()); + auto* summary = summary_metric->mutable_summary(); + const Stats::HistogramStatistics& hist_stats = envoy_histogram.intervalStatistics(); for (size_t i = 0; i < hist_stats.supportedQuantiles().size(); i++) { - auto* quantile = summary_metric->add_quantile(); + auto* quantile = summary->add_quantile(); quantile->set_quantile(hist_stats.supportedQuantiles()[i]); quantile->set_value(hist_stats.computedQuantiles()[i]); } + + // Add bucket information for histograms. + io::prometheus::client::MetricFamily* histogram_metrics_family = message_.add_envoy_metrics(); + histogram_metrics_family->set_type(io::prometheus::client::MetricType::HISTOGRAM); + histogram_metrics_family->set_name(envoy_histogram.name()); + auto* histogram_metric = histogram_metrics_family->add_metric(); + histogram_metric->set_timestamp_ms(std::chrono::duration_cast( + time_source_.systemTime().time_since_epoch()) + .count()); + auto* histogram = histogram_metric->mutable_histogram(); + histogram->set_sample_count(hist_stats.sampleCount()); + histogram->set_sample_sum(hist_stats.sampleSum()); + for (size_t i = 0; i < hist_stats.supportedBuckets().size(); i++) { + auto* bucket = histogram->add_bucket(); + bucket->set_upper_bound(hist_stats.supportedBuckets()[i]); + bucket->set_cumulative_count(hist_stats.computedBuckets()[i]); + } } void MetricsServiceSink::flush(Stats::MetricSnapshot& snapshot) { diff --git a/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.h b/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.h index 14a99e92ac07..5c0473b084dc 100644 --- a/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.h +++ b/source/extensions/stat_sinks/metrics_service/grpc_metrics_service_impl.h @@ -80,7 +80,7 @@ class MetricsServiceSink : public Stats::Sink { void flushCounter(const Stats::Counter& counter); void flushGauge(const Stats::Gauge& gauge); - void flushHistogram(const Stats::ParentHistogram& histogram); + void flushHistogram(const Stats::ParentHistogram& envoy_histogram); private: GrpcMetricsStreamerSharedPtr grpc_metrics_streamer_; diff --git a/test/extensions/stats_sinks/metrics_service/metrics_service_integration_test.cc b/test/extensions/stats_sinks/metrics_service/metrics_service_integration_test.cc index 6d698a36dfc7..e19310347334 100644 --- a/test/extensions/stats_sinks/metrics_service/metrics_service_integration_test.cc +++ b/test/extensions/stats_sinks/metrics_service/metrics_service_integration_test.cc @@ -66,9 +66,11 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes ABSL_MUST_USE_RESULT AssertionResult waitForMetricsRequest() { + bool known_summary_exists = false; bool known_histogram_exists = false; bool known_counter_exists = false; bool known_gauge_exists = false; + // Sometimes stats do not come in the first flush cycle, this loop ensures that we wait till // required stats are flushed. // TODO(ramaraochavali): Figure out a more robust way to find out all required stats have been @@ -98,11 +100,18 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes } if (metrics_family.name() == "cluster.cluster_0.upstream_rq_time" && metrics_family.type() == ::io::prometheus::client::MetricType::SUMMARY) { - known_histogram_exists = true; + known_summary_exists = true; Stats::HistogramStatisticsImpl empty_statistics; EXPECT_EQ(metrics_family.metric(0).summary().quantile_size(), empty_statistics.supportedQuantiles().size()); } + if (metrics_family.name() == "cluster.cluster_0.upstream_rq_time" && + metrics_family.type() == ::io::prometheus::client::MetricType::HISTOGRAM) { + known_histogram_exists = true; + Stats::HistogramStatisticsImpl empty_statistics; + EXPECT_EQ(metrics_family.metric(0).histogram().bucket_size(), + empty_statistics.supportedBuckets().size()); + } ASSERT(metrics_family.metric(0).has_timestamp_ms()); if (known_counter_exists && known_gauge_exists && known_histogram_exists) { break; @@ -111,6 +120,7 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes } EXPECT_TRUE(known_counter_exists); EXPECT_TRUE(known_gauge_exists); + EXPECT_TRUE(known_summary_exists); EXPECT_TRUE(known_histogram_exists); return AssertionSuccess();