Skip to content

Commit

Permalink
Merge pull request #6524 from jcsp/cloud-storage-log-errors
Browse files Browse the repository at this point in the history
cloud_storage: improve error handling
  • Loading branch information
jcsp authored Sep 29, 2022
2 parents 0259c6a + 201886c commit a209299
Showing 1 changed file with 40 additions and 7 deletions.
47 changes: 40 additions & 7 deletions src/v/cloud_storage/remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <boost/beast/http/error.hpp>
#include <boost/beast/http/field.hpp>
#include <fmt/chrono.h>
#include <gnutls/gnutls.h>

#include <exception>
#include <utility>
Expand All @@ -47,6 +48,41 @@ enum class error_outcome {
notfound
};

/**
* Identify error cases that should be quickly retried, e.g.
* TCP disconnects, timeouts. Network errors may also show up
* indirectly as errors from the TLS layer.
*/
bool system_error_retryable(const std::system_error& e) {
auto v = e.code().value();

// The name() of seastar's gnutls_error_category class
constexpr std::string_view gnutls_cateogry_name{"GnuTLS"};

if (e.code().category().name() == gnutls_cateogry_name) {
switch (v) {
case GNUTLS_E_PUSH_ERROR:
case GNUTLS_E_PULL_ERROR:
case GNUTLS_E_PREMATURE_TERMINATION:
return true;
default:
return false;
}
} else {
switch (v) {
case ECONNREFUSED:
case ENETUNREACH:
case ETIMEDOUT:
case ECONNRESET:
case EPIPE:
return true;
default:
return false;
}
}
__builtin_unreachable();
}

/// @brief Analyze exception
/// @return error outcome - retry, fail (with exception), or notfound (can only
/// be used with download)
Expand Down Expand Up @@ -109,17 +145,14 @@ static error_outcome categorize_error(
// - any filesystem error
// - broken-pipe
// - any other network error (no memory, bad socket, etc)
if (auto code = cerr.code();
code.value() != ECONNREFUSED && code.value() != ENETUNREACH
&& code.value() != ETIMEDOUT && code.value() != ECONNRESET
&& code.value() != EPIPE) {
vlog(ctxlog.error, "System error {}", cerr);
result = error_outcome::fail;
} else {
if (system_error_retryable(cerr)) {
vlog(
ctxlog.warn,
"System error susceptible for retry {}",
cerr.what());
} else {
vlog(ctxlog.error, "System error {}", cerr);
result = error_outcome::fail;
}
} catch (const ss::timed_out_error& terr) {
// This should happen when the connection pool was disconnected
Expand Down

0 comments on commit a209299

Please sign in to comment.