redpanda-data · mmaslankaprv · Aug 19, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/src/v/cluster/errc.h b/src/v/cluster/errc.h
@@ -89,6 +89,7 @@ enum class errc : int16_t {
     topic_invalid_partitions_decreased,
     producer_ids_vcluster_limit_exceeded,
     validation_of_recovery_topic_failed,
+    invalid_target_node_id,
 };
 
 std::ostream& operator<<(std::ostream& o, errc err);
@@ -262,6 +263,8 @@ struct errc_category final : public std::error_category {
             return "To many vclusters registered in producer state cache";
         case errc::validation_of_recovery_topic_failed:
             return "Validation of recovery topic failed";
+        case errc::invalid_target_node_id:
+            return "Request was intended for the node with different node id";
         }
         return "cluster::errc::unknown";
     }

diff --git a/src/v/cluster/errors.cc b/src/v/cluster/errors.cc
@@ -166,6 +166,8 @@ std::ostream& operator<<(std::ostream& o, cluster::errc err) {
         return o << "cluster::errc::producer_ids_vcluster_limit_exceeded";
     case errc::validation_of_recovery_topic_failed:
         return o << "cluster::errc::validation_of_recovery_topic_failed";
+    case errc::invalid_target_node_id:
+        return o << "cluster::errc::invalid_target_node_id";
     }
 }
 } // namespace cluster
diff --git a/src/v/cluster/health_monitor_backend.cc b/src/v/cluster/health_monitor_backend.cc
@@ -366,30 +366,33 @@ health_monitor_backend::collect_remote_node_health(model::node_id id) {
         ss::this_shard_id(),
         id,
         max_metadata_age(),
-        [timeout](controller_client_protocol client) mutable {
+        [timeout, id](controller_client_protocol client) mutable {
             return client.collect_node_health_report(
-              get_node_health_request{}, rpc::client_opts(timeout));
+              get_node_health_request(id), rpc::client_opts(timeout));
         })
       .then(&rpc::get_ctx_data<get_node_health_reply>)
       .then([this, id](result<get_node_health_reply> reply) {
           return process_node_reply(id, std::move(reply));
       });
 }
 
-result<node_health_report>
-map_reply_result(result<get_node_health_reply> reply) {
+result<node_health_report> map_reply_result(
+  model::node_id target_node_id, result<get_node_health_reply> reply) {
     if (!reply) {
         return {reply.error()};
     }
     if (!reply.value().report.has_value()) {
         return {reply.value().error};
     }
+    if (reply.value().report->id != target_node_id) {
+        return {errc::invalid_target_node_id};
+    }
     return {std::move(*reply.value().report)};
 }
 
 result<node_health_report> health_monitor_backend::process_node_reply(
   model::node_id id, result<get_node_health_reply> reply) {
-    auto res = map_reply_result(std::move(reply));
+    auto res = map_reply_result(id, std::move(reply));
     auto [status_it, _] = _status.try_emplace(id);
     if (!res) {
         vlog(

diff --git a/src/v/cluster/health_monitor_types.cc b/src/v/cluster/health_monitor_types.cc
@@ -267,8 +267,8 @@ std::ostream& operator<<(std::ostream& o, const partitions_filter& filter) {
     return o;
 }
 
-std::ostream& operator<<(std::ostream& o, const get_node_health_request&) {
-    fmt::print(o, "{{}}");
+std::ostream& operator<<(std::ostream& o, const get_node_health_request& r) {
+    fmt::print(o, "{{target_node_id: {}}}", r.get_target_node_id());
     return o;
 }
 

diff --git a/src/v/cluster/health_monitor_types.h b/src/v/cluster/health_monitor_types.h
@@ -430,10 +430,13 @@ using force_refresh = ss::bool_class<struct hm_force_refresh_tag>;
 class get_node_health_request
   : public serde::envelope<
       get_node_health_request,
-      serde::version<0>,
+      serde::version<1>,
       serde::compat_version<0>> {
 public:
     using rpc_adl_exempt = std::true_type;
+    get_node_health_request() = default;
+    explicit get_node_health_request(model::node_id target_node_id)
+      : _target_node_id(target_node_id) {}
 
     friend bool
     operator==(const get_node_health_request&, const get_node_health_request&)
@@ -442,9 +445,14 @@ class get_node_health_request
     friend std::ostream&
     operator<<(std::ostream&, const get_node_health_request&);
 
-    auto serde_fields() { return std::tie(_filter); }
+    auto serde_fields() { return std::tie(_filter, _target_node_id); }
+    static constexpr model::node_id node_id_not_set{-1};
+
+    model::node_id get_target_node_id() const { return _target_node_id; }
 
 private:
+    // default value for backward compatibility
+    model::node_id _target_node_id = node_id_not_set;
     /**
      * This field is no longer used, as it never was. It was made private on
      * purpose

diff --git a/src/v/cluster/node_status_backend.cc b/src/v/cluster/node_status_backend.cc
@@ -236,7 +236,7 @@ ss::future<result<node_status>> node_status_backend::send_node_status_request(
             })
           .then(&rpc::get_ctx_data<node_status_reply>);
 
-    co_return process_reply(reply);
+    co_return process_reply(target, reply);
 }
 
 ss::future<> node_status_backend::maybe_create_client(
@@ -245,26 +245,18 @@ ss::future<> node_status_backend::maybe_create_client(
       target, address, _rpc_tls_config, create_backoff_policy());
 }
 
-result<node_status>
-node_status_backend::process_reply(result<node_status_reply> reply) {
+result<node_status> node_status_backend::process_reply(
+  model::node_id target_node_id, result<node_status_reply> reply) {
     vassert(ss::this_shard_id() == shard, "invoked on a wrong shard");
-
-    if (!reply.has_error()) {
-        _stats.rpcs_sent += 1;
-        auto& replier_metadata = reply.value().replier_metadata;
-
-        return node_status{
-          .node_id = replier_metadata.node_id,
-          .last_seen = rpc::clock_type::now()};
-    } else {
+    static constexpr auto rate_limit = std::chrono::seconds(1);
+    if (reply.has_error()) {
         auto err = reply.error();
         if (
           err.category() == rpc::error_category()
           && static_cast<rpc::errc>(err.value())
                == rpc::errc::client_request_timeout) {
             _stats.rpcs_timed_out += 1;
         }
-        static constexpr auto rate_limit = std::chrono::seconds(1);
         static ss::logger::rate_limit rate(rate_limit);
         clusterlog.log(
           ss::log_level::debug,
@@ -273,6 +265,23 @@ node_status_backend::process_reply(result<node_status_reply> reply) {
           err.message());
         return err;
     }
+
+    _stats.rpcs_sent += 1;
+    auto& replier_metadata = reply.value().replier_metadata;
+    if (replier_metadata.node_id != target_node_id) {
+        static ss::logger::rate_limit rate(rate_limit);
+        clusterlog.log(
+          ss::log_level::debug,
+          rate,
+          "Received reply from node with different node id. Expected: {}, "
+          "current: {}",
+          target_node_id,
+          replier_metadata.node_id);
+        return errc::invalid_target_node_id;
+    }
+
+    return node_status{
+      .node_id = replier_metadata.node_id, .last_seen = rpc::clock_type::now()};
 }
 
 ss::future<node_status_reply>

diff --git a/src/v/cluster/node_status_backend.h b/src/v/cluster/node_status_backend.h
@@ -76,7 +76,8 @@ class node_status_backend {
     ss::future<> collect_and_store_updates();
     ss::future<std::vector<node_status>> collect_updates_from_peers();
 
-    result<node_status> process_reply(result<node_status_reply>);
+    result<node_status> process_reply(
+      model::node_id target_node_id, result<node_status_reply> reply);
     ss::future<node_status_reply> process_request(node_status_request);
 
     ss::future<result<node_status>>

diff --git a/src/v/cluster/service.cc b/src/v/cluster/service.cc
@@ -510,7 +510,21 @@ ss::future<get_cluster_health_reply> service::get_cluster_health_report(
 }
 
 ss::future<get_node_health_reply>
-service::do_collect_node_health_report(get_node_health_request) {
+service::do_collect_node_health_report(get_node_health_request req) {
+    // validate if the receiving node is the one that that the request is
+    // addressed to
+    if (
+      req.get_target_node_id() != get_node_health_request::node_id_not_set
+      && req.get_target_node_id() != _controller->self()) {
+        vlog(
+          clusterlog.debug,
+          "Received a get_node_health request addressed to different node. "
+          "Requested node id: {}, current node id: {}",
+          req.get_target_node_id(),
+          _controller->self());
+        co_return get_node_health_reply{.error = errc::invalid_target_node_id};
+    }
+
     auto res = co_await _hm_frontend.local().get_current_node_health();
     if (res.has_error()) {
         co_return get_node_health_reply{

diff --git a/src/v/kafka/server/errors.h b/src/v/kafka/server/errors.h
@@ -107,6 +107,7 @@ constexpr error_code map_topic_error_code(cluster::errc code) {
     case cluster::errc::waiting_for_shard_placement_update:
     case cluster::errc::producer_ids_vcluster_limit_exceeded:
     case cluster::errc::validation_of_recovery_topic_failed:
+    case cluster::errc::invalid_target_node_id:
         break;
     }
     return error_code::unknown_server_error;

diff --git a/tests/rptest/tests/node_folder_deletion_test.py b/tests/rptest/tests/node_folder_deletion_test.py
@@ -94,7 +94,22 @@ def test_deleting_node_folder(self):
         wait_until(lambda: producer.produce_status.acked > 200000,
                    timeout_sec=120,
                    backoff_sec=0.5)
+
         admin = Admin(self.redpanda)
+
+        # validate that the node with deleted folder is recognized as offline
+        def removed_node_is_reported_offline():
+            cluster_health = admin.get_cluster_health_overview()
+            return id in cluster_health['nodes_down']
+
+        wait_until(
+            removed_node_is_reported_offline,
+            timeout_sec=20,
+            backoff_sec=0.5,
+            err_msg=
+            f"Node {id} is expected to be marked as offline as it was replaced by new node"
+        )
+
         # decommission a node that has been cleared
         admin.decommission_broker(id)
         waiter = NodeDecommissionWaiter(self.redpanda,