Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Outlier Detection: use gRPC status code for detecting failures #7942

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions source/common/router/router.cc
Original file line number Diff line number Diff line change
Expand Up @@ -931,15 +931,14 @@ Filter::streamResetReasonToResponseFlag(Http::StreamResetReason reset_reason) {
NOT_REACHED_GCOVR_EXCL_LINE;
}

void Filter::handleNon5xxResponseHeaders(const Http::HeaderMap& headers,
UpstreamRequest& upstream_request, bool end_stream) {
void Filter::handleNon5xxResponseHeaders(absl::optional<Grpc::Status::GrpcStatus> grpc_status,
UpstreamRequest& upstream_request, bool end_stream,
uint64_t grpc_to_http_status) {
// We need to defer gRPC success until after we have processed grpc-status in
// the trailers.
if (grpc_request_) {
if (end_stream) {
absl::optional<Grpc::Status::GrpcStatus> grpc_status = Grpc::Common::getGrpcStatus(headers);
if (grpc_status &&
!Http::CodeUtility::is5xx(Grpc::Utility::grpcToHttpStatus(grpc_status.value()))) {
if (grpc_status && !Http::CodeUtility::is5xx(grpc_to_http_status)) {
upstream_request.upstream_host_->stats().rq_success_.inc();
} else {
upstream_request.upstream_host_->stats().rq_error_.inc();
Expand Down Expand Up @@ -1002,8 +1001,23 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
ENVOY_STREAM_LOG(debug, "upstream headers complete: end_stream={}", *callbacks_, end_stream);

modify_headers_(*headers);
// When grpc-status appears in response headers, convert grpc-status to HTTP status code
// for outlier detection. This does not currently change any stats or logging and does not
// handle the case when an error grpc-status is sent as a trailer.
absl::optional<Grpc::Status::GrpcStatus> grpc_status;
htuch marked this conversation as resolved.
Show resolved Hide resolved
uint64_t grpc_to_http_status = 0;
if (grpc_request_) {
grpc_status = Grpc::Common::getGrpcStatus(*headers);
if (grpc_status.has_value()) {
grpc_to_http_status = Grpc::Utility::grpcToHttpStatus(grpc_status.value());
}
}

upstream_request.upstream_host_->outlierDetector().putHttpResponseCode(response_code);
if (grpc_status.has_value()) {
upstream_request.upstream_host_->outlierDetector().putHttpResponseCode(grpc_to_http_status);
} else {
upstream_request.upstream_host_->outlierDetector().putHttpResponseCode(response_code);
}

if (headers->EnvoyImmediateHealthCheckFail() != nullptr) {
upstream_request.upstream_host_->healthChecker().setUnhealthy();
Expand Down Expand Up @@ -1090,7 +1104,7 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
upstream_request.upstream_host_->canary();
chargeUpstreamCode(response_code, *headers, upstream_request.upstream_host_, false);
if (!Http::CodeUtility::is5xx(response_code)) {
handleNon5xxResponseHeaders(*headers, upstream_request, end_stream);
handleNon5xxResponseHeaders(grpc_status, upstream_request, end_stream, grpc_to_http_status);
}

// Append routing cookies
Expand Down
5 changes: 3 additions & 2 deletions source/common/router/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,9 @@ class Filter : Logger::Loggable<Logger::Id::router>,
void doRetry();
// Called immediately after a non-5xx header is received from upstream, performs stats accounting
// and handle difference between gRPC and non-gRPC requests.
void handleNon5xxResponseHeaders(const Http::HeaderMap& headers,
UpstreamRequest& upstream_request, bool end_stream);
void handleNon5xxResponseHeaders(absl::optional<Grpc::Status::GrpcStatus> grpc_status,
UpstreamRequest& upstream_request, bool end_stream,
uint64_t grpc_to_http_status);
TimeSource& timeSource() { return config_.timeSource(); }
Http::Context& httpContext() { return config_.http_context_; }

Expand Down
31 changes: 28 additions & 3 deletions test/common/router/router_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1194,11 +1194,36 @@ TEST_F(RouterTest, GrpcAlreadyExistsTrailersOnly) {

Http::HeaderMapPtr response_headers(
new Http::TestHeaderMapImpl{{":status", "200"}, {"grpc-status", "6"}});
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(409));
response_decoder->decodeHeaders(std::move(response_headers), true);
EXPECT_TRUE(verifyHostUpstreamStats(1, 0));
}

// Validate outlier detections records failure when gRPC response status is Unavailable.
TEST_F(RouterTest, GrpcOutlierDetectionUnavailableStatusCode) {
NiceMock<Http::MockStreamEncoder> encoder1;
Http::StreamDecoder* response_decoder = nullptr;
EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
.WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
-> Http::ConnectionPool::Cancellable* {
response_decoder = &decoder;
callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
return nullptr;
}));
expectResponseTimerCreate();

Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}, {"grpc-timeout", "20S"}};
HttpTestUtility::addDefaultHeaders(headers);
router_.decodeHeaders(headers, true);

Http::HeaderMapPtr response_headers(
new Http::TestHeaderMapImpl{{":status", "200"}, {"grpc-status", "14"}});
// Outlier detector will use the gRPC response status code.
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(503));
response_decoder->decodeHeaders(std::move(response_headers), true);
EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
}
ZhouyihaiDing marked this conversation as resolved.
Show resolved Hide resolved

// Validate gRPC Internal response stats are sane when response is trailers only.
TEST_F(RouterTest, GrpcInternalTrailersOnly) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

NiceMock<Http::MockStreamEncoder> encoder1;
Expand All @@ -1218,7 +1243,7 @@ TEST_F(RouterTest, GrpcInternalTrailersOnly) {

Http::HeaderMapPtr response_headers(
new Http::TestHeaderMapImpl{{":status", "200"}, {"grpc-status", "13"}});
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(500));
response_decoder->decodeHeaders(std::move(response_headers), true);
EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
}
Expand Down Expand Up @@ -2777,7 +2802,7 @@ TEST_F(RouterTest, RetryUpstreamGrpcCancelled) {
router_.retry_state_->expectHeadersRetry();
Http::HeaderMapPtr response_headers1(
new Http::TestHeaderMapImpl{{":status", "200"}, {"grpc-status", "1"}});
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(499));
response_decoder->decodeHeaders(std::move(response_headers1), true);
EXPECT_TRUE(verifyHostUpstreamStats(0, 1));

Expand Down