Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CORE-2157: cloud_storage_clients: add support for path-style addressing #17806

Merged
merged 7 commits into from
Apr 24, 2024
Merged
1 change: 1 addition & 0 deletions src/v/archival/tests/archival_metadata_stm_gtest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ cloud_storage_clients::s3_configuration get_configuration() {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf.disable_metrics = net::metrics_disabled::yes;
conf.disable_public_metrics = net::public_metrics_disabled::yes;
Expand Down
1 change: 1 addition & 0 deletions src/v/archival/tests/archival_metadata_stm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct archival_metadata_stm_base_fixture
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
2 changes: 2 additions & 0 deletions src/v/archival/tests/archival_service_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "archival/archiver_manager.h"
#include "archival/tests/service_fixture.h"
#include "archival/types.h"
#include "cloud_storage_clients/types.h"
#include "cluster/controller_api.h"
#include "cluster/errc.h"
#include "cluster/fwd.h"
Expand Down Expand Up @@ -59,6 +60,7 @@ class archiver_cluster_fixture
s3_conf.access_key = cloud_roles::public_key_str("access-key");
s3_conf.secret_key = cloud_roles::private_key_str("secret-key");
s3_conf.region = cloud_roles::aws_region_name("us-east-1");
s3_conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
s3_conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
net::public_metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/archival/tests/service_fixture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ archiver_fixture::get_configurations() {
s3conf.access_key = cloud_roles::public_key_str("acess-key");
s3conf.secret_key = cloud_roles::private_key_str("secret-key");
s3conf.region = cloud_roles::aws_region_name("us-east-1");
s3conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
s3conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
net::public_metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage/tests/anomalies_detector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ class bucket_view_fixture : http_imposter_fixture {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage/tests/s3_imposter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ s3_imposter_fixture::get_configuration() {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ cloud_storage_clients::default_overrides get_default_overrides() {
optep.has_value()) {
overrides.endpoint = cloud_storage_clients::endpoint_url(*optep);
}
overrides.url_style = config::shard_local_cfg().cloud_storage_url_style();
overrides.disable_tls = config::shard_local_cfg().cloud_storage_disable_tls;
if (auto cert = config::shard_local_cfg().cloud_storage_trust_file.value();
cert.has_value()) {
Expand Down
2 changes: 2 additions & 0 deletions src/v/cloud_storage_clients/configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ ss::future<s3_configuration> s3_configuration::make_configuration(
client_cfg.secret_key = skey;
client_cfg.region = region;
client_cfg.uri = access_point_uri(endpoint_uri);
client_cfg.url_style = overrides.url_style;

if (overrides.disable_tls == false) {
client_cfg.credentials = co_await build_tls_credentials(
"s3", overrides.trust_file, s3_log);
Expand Down
3 changes: 3 additions & 0 deletions src/v/cloud_storage_clients/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct default_overrides {
std::optional<uint16_t> port = std::nullopt;
std::optional<ca_trust_file> trust_file = std::nullopt;
std::optional<ss::lowres_clock::duration> max_idle_time = std::nullopt;
s3_url_style url_style = s3_url_style::virtual_host;
bool disable_tls = false;
};

Expand All @@ -48,6 +49,8 @@ struct s3_configuration : common_configuration {
std::optional<cloud_roles::public_key_str> access_key;
/// AWS secret key, optional if configuration uses temporary credentials
std::optional<cloud_roles::private_key_str> secret_key;
/// AWS URL style, either virtual-hosted-style or path-style.
s3_url_style url_style;

/// \brief opinionated configuraiton initialization
/// Generates uri field from region, initializes credentials for the
Expand Down
98 changes: 77 additions & 21 deletions src/v/cloud_storage_clients/s3_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,26 @@ request_creator::request_creator(
const s3_configuration& conf,
ss::lw_shared_ptr<const cloud_roles::apply_credentials> apply_credentials)
: _ap(conf.uri)
, _ap_style(conf.url_style)
, _apply_credentials{std::move(apply_credentials)} {}

result<http::client::request_header> request_creator::make_get_object_request(
bucket_name const& name,
object_key const& key,
std::optional<http_byte_range> byte_range) {
http::client::request_header header{};
// Virtual Style:
// GET /{object-id} HTTP/1.1
// Host: {bucket-name}.s3.amazonaws.com
// Host: {bucket-name}.s3.{region}.amazonaws.com
// Path Style:
// GET /{bucket-name}/{object-id} HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
// x-amz-date:{req-datetime}
// Authorization:{signature}
// x-amz-content-sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
auto host = fmt::format("{}.{}", name(), _ap());
auto target = fmt::format("/{}", key().string());
auto host = make_host(name);
auto target = make_target(name, key);
header.method(boost::beast::http::verb::get);
header.target(target);
header.insert(
Expand All @@ -111,13 +117,18 @@ result<http::client::request_header> request_creator::make_get_object_request(
result<http::client::request_header> request_creator::make_head_object_request(
bucket_name const& name, object_key const& key) {
http::client::request_header header{};
// Virtual Style:
// HEAD /{object-id} HTTP/1.1
// Host: {bucket-name}.s3.amazonaws.com
// Host: {bucket-name}.s3.{region}.amazonaws.com
// Path Style:
// HEAD /{bucket-name}/{object-id} HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
// x-amz-date:{req-datetime}
// Authorization:{signature}
// x-amz-content-sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
auto host = fmt::format("{}.{}", name(), _ap());
auto target = fmt::format("/{}", key().string());
auto host = make_host(name);
auto target = make_target(name, key);
header.method(boost::beast::http::verb::head);
header.target(target);
header.insert(
Expand All @@ -134,8 +145,13 @@ result<http::client::request_header> request_creator::make_head_object_request(
result<http::client::request_header>
request_creator::make_unsigned_put_object_request(
bucket_name const& name, object_key const& key, size_t payload_size_bytes) {
// Virtual Style:
// PUT /my-image.jpg HTTP/1.1
// Host: myBucket.s3.<Region>.amazonaws.com
// Host: {bucket-name}.s3.{region}.amazonaws.com
// Path Style:
// PUT /{bucket-name}/{object-id} HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
// Date: Wed, 12 Oct 2009 17:50:00 GMT
// Authorization: authorization string
// Content-Type: text/plain
Expand All @@ -144,8 +160,8 @@ request_creator::make_unsigned_put_object_request(
// Expect: 100-continue
// [11434 bytes of object data]
http::client::request_header header{};
auto host = fmt::format("{}.{}", name(), _ap());
auto target = fmt::format("/{}", key().string());
auto host = make_host(name);
auto target = make_target(name, key);
header.method(boost::beast::http::verb::put);
header.target(target);
header.insert(
Expand All @@ -172,19 +188,25 @@ request_creator::make_list_objects_v2_request(
std::optional<size_t> max_keys,
std::optional<ss::sstring> continuation_token,
std::optional<char> delimiter) {
// Virtual Style:
// GET /?list-type=2&prefix=photos/2006/&delimiter=/ HTTP/1.1
// Host: example-bucket.s3.<Region>.amazonaws.com
// Host: {bucket-name}.s3.{region}.amazonaws.com
// Path Style:
// GET /{bucket-name}/?list-type=2&prefix=photos/2006/&delimiter=/ HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
// x-amz-date: 20160501T000433Z
// Authorization: authorization string
http::client::request_header header{};
auto host = fmt::format("{}.{}", name(), _ap());
auto target = fmt::format("/?list-type=2");
auto host = make_host(name);
auto key = fmt::format("?list-type=2");
if (prefix.has_value()) {
target = fmt::format("{}&prefix={}", target, (*prefix)().string());
key = fmt::format("{}&prefix={}", key, (*prefix)().string());
}
if (delimiter.has_value()) {
target = fmt::format("{}&delimiter={}", target, *delimiter);
key = fmt::format("{}&delimiter={}", key, *delimiter);
}
auto target = make_target(name, object_key{key});
header.method(boost::beast::http::verb::get);
header.target(target);
header.insert(
Expand Down Expand Up @@ -224,15 +246,20 @@ result<http::client::request_header>
request_creator::make_delete_object_request(
bucket_name const& name, object_key const& key) {
http::client::request_header header{};
// Virtual Style:
// DELETE /{object-id} HTTP/1.1
// Host: {bucket-name}.s3.amazonaws.com
// Path Style:
// DELETE /{bucket-name}/{object-id} HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
// x-amz-date:{req-datetime}
// Authorization:{signature}
// x-amz-content-sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
//
// NOTE: x-amz-mfa, x-amz-bypass-governance-retention are not used for now
auto host = fmt::format("{}.{}", name(), _ap());
auto target = fmt::format("/{}", key().string());
auto host = make_host(name);
auto target = make_target(name, key);
header.method(boost::beast::http::verb::delete_);
header.target(target);
header.insert(
Expand Down Expand Up @@ -262,8 +289,13 @@ request_creator::make_delete_objects_request(
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html
// will generate this request:
//
// Virtual Style:
// POST /?delete HTTP/1.1
// Host: <Bucket>.s3.amazonaws.com
// Host: {bucket-name}.s3.{region}.amazonaws.com
// Path Style:
// POST /{bucket-name}/?delete HTTP/1.1
// Host: s3.{region}.amazonaws.com
//
Comment on lines +292 to +298
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comments!

// Content-MD5: <Computer from body>
// Authorization: <applied by _requestor>
// Content-Length: <...>
Expand Down Expand Up @@ -317,11 +349,12 @@ request_creator::make_delete_objects_request(
return bytes_to_base64(to_bytes_view(bin_digest));
}();

auto header = http::client::request_header{};
http::client::request_header header{};
header.method(boost::beast::http::verb::post);
header.target("/?delete");
header.insert(
boost::beast::http::field::host, fmt::format("{}.{}", name(), _ap()));
auto host = make_host(name);
auto target = make_target(name, object_key{"?delete"});
header.target(target);
header.insert(boost::beast::http::field::host, host);
// from experiments, minio is sloppy in checking this field. It will check
// that it's valid base64, but seems not to actually check the value
header.insert(
Expand All @@ -343,6 +376,29 @@ request_creator::make_delete_objects_request(
std::make_unique<delete_objects_body>(std::move(body))}}};
}

std::string request_creator::make_host(const bucket_name& name) const {
switch (_ap_style) {
case s3_url_style::virtual_host:
// Host: bucket-name.s3.region-code.amazonaws.com
return fmt::format("{}.{}", name(), _ap());
case s3_url_style::path:
// Host: s3.region-code.amazonaws.com
return fmt::format("{}", _ap());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the bucket name is not included?

Copy link
Contributor Author

@WillemKauf WillemKauf Apr 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following the S3 User Guide's examples of path-style requests, this seems to be the proper form for the host in the header.

The bucket name is instead used in the target of the header.

In commit 8edf7d5, I added some comments in the various request_creator::make_*() functions to reflect this style.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following the S3 User Guide's examples of path-style requests, this seems to be the proper form for the host in the header.

The bucket name is instead used in the target of the header.

In commit 8edf7d5, I added some comments in the various request_creator::make_*() functions to reflect this style.

I was also confused here. But the answer is that this is make_host, not make_target, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, correct. This is make_host.

}
}

std::string request_creator::make_target(
const bucket_name& name, const object_key& key) const {
switch (_ap_style) {
case s3_url_style::virtual_host:
// Target: /homepage.html
return fmt::format("/{}", key().string());
case s3_url_style::path:
// Target: /example.com/homepage.html
return fmt::format("/{}/{}", name(), key().string());
}
}

// client //

inline cloud_storage_clients::s3_error_code
Expand Down
8 changes: 8 additions & 0 deletions src/v/cloud_storage_clients/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "cloud_roles/apply_credentials.h"
#include "cloud_storage_clients/client.h"
#include "cloud_storage_clients/client_probe.h"
#include "cloud_storage_clients/types.h"
#include "http/client.h"
#include "model/fundamental.h"

Expand Down Expand Up @@ -106,7 +107,14 @@ class request_creator {
std::optional<char> delimiter = std::nullopt);

private:
std::string make_host(const bucket_name& name) const;

std::string
make_target(const bucket_name& name, const object_key& key) const;

access_point_uri _ap;

s3_url_style _ap_style;
/// Applies credentials to http requests by adding headers and signing
/// request payload. Shared pointer so that the credentials can be rotated
/// through the client pool.
Expand Down
20 changes: 17 additions & 3 deletions src/v/cloud_storage_clients/test_client/s3_test_client_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "bytes/iobuf.h"
#include "cloud_storage_clients/s3_client.h"
#include "cloud_storage_clients/s3_error.h"
#include "cloud_storage_clients/types.h"
#include "http/client.h"
#include "syschecks/syschecks.h"

Expand Down Expand Up @@ -76,6 +77,11 @@ void cli_opts(boost::program_options::options_description_easy_init opt) {
po::value<std::string>()->default_value("us-east-1"),
"aws region");

opt(
"url_style",
po::value<std::string>()->default_value("virtual_host"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good for development but we also need a proper unit-test. In the src/v/cloud_storage/remote.h we have a remote class which serves as a proxy for the code that has to interact with the cloud storage. All code paths which are downloading or uploading or deleting are using this class instead of the client. The unit-test has to show that when the path-style is enabled no REST API request uses virtual host style URL. The cloud_storage::remote has a unit-test suite. It invokes every method of the remote and checks the results using the s3_imposter. The imposter allows you to examine parameters of the request and also validate the URL. So basically, we need to have all these tests to be invoked with virtual host style URLs and also with path-style URLs.

The motivation here is that some cloud storage providers may not support one style or the other. In this situation we don't want to accidentally use the wrong style.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like all existing REST API's that we're using are supported. But we can add new request in the future so some check is nice to have.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be done in a followup.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be done in a followup.

@Lazin Yes, and we are also going to wire up ducktape wiht minio running in both modes (pretty sure it supports it). You can see all teh follow up work in CORE-725.

@WillemKauf can you capture Evgeny's suggestions about additional unit testing in a new or existing jira ticket as a subtask for the overall project?

"aws addressing style");

opt(
"in",
po::value<std::string>()->default_value(""),
Expand Down Expand Up @@ -120,9 +126,10 @@ struct fmt::formatter<test_conf> : public fmt::formatter<std::string_view> {
// make the output json-able so we can consume it in python for analysis
return formatter<std::string_view>::format(
fmt::format(
"[ 'bucket': '{}', 'objects': ['{}'] ]",
"[ 'bucket': '{}', 'objects': ['{}'], 'path style': {} ]",
cfg.bucket,
fmt::join(cfg.objects, "', '")),
fmt::join(cfg.objects, "', '"),
cfg.client_cfg.url_style),
ctx);
}
};
Expand Down Expand Up @@ -163,6 +170,13 @@ test_conf cfg_from(boost::program_options::variables_map& m) {
}
return std::nullopt;
}(),
.url_style = [&]() -> cloud_storage_clients::s3_url_style {
if (m["url_style"].as<std::string>() == "virtual_host") {
return cloud_storage_clients::s3_url_style::virtual_host;
} else {
return cloud_storage_clients::s3_url_style::path;
}
}(),
.disable_tls = m.contains("disable-tls") > 0,
})
.get0();
Expand Down Expand Up @@ -358,7 +372,7 @@ int main(int args, char** argv, char** env) {
} else {
vlog(
test_log.error,
"DeleteObject request failes: {}",
"DeleteObject request failed: {}",
undeleted.error());
}
}
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage_clients/tests/client_pool_mt_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ static cloud_storage_clients::s3_configuration transport_configuration() {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage_clients/tests/client_pool_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ static cloud_storage_clients::s3_configuration transport_configuration() {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage_clients/tests/s3_client_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ static cloud_storage_clients::s3_configuration transport_configuration() {
conf.access_key = cloud_roles::public_key_str("acess-key");
conf.secret_key = cloud_roles::private_key_str("secret-key");
conf.region = cloud_roles::aws_region_name("us-east-1");
conf.url_style = cloud_storage_clients::s3_url_style::virtual_host;
conf.server_addr = server_addr;
conf._probe = ss::make_shared<cloud_storage_clients::client_probe>(
net::metrics_disabled::yes,
Expand Down
Loading
Loading