Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

load balancing: Disabling zone aware routing for non-zero priority levels. #2244

Merged
merged 4 commits into from
Dec 23, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 43 additions & 29 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,17 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats
Runtime::Loader& runtime, Runtime::RandomGenerator& random)
: stats_(stats), runtime_(runtime), random_(random), priority_set_(priority_set),
best_available_host_set_(bestAvailable(&priority_set)) {
per_priority_load_.resize(priority_set.hostSetsPerPriority().size());
per_priority_load_[best_available_host_set_->priority()] = 100;
priority_set_.addMemberUpdateCb([this](uint32_t, const std::vector<HostSharedPtr>&,
const std::vector<HostSharedPtr>&) -> void {
per_priority_load_.resize(priority_set_.hostSetsPerPriority().size());
per_priority_load_[best_available_host_set_->priority()] = 0;
// Update the host set to use for picking, based on the new state.
best_available_host_set_ = bestAvailable(&priority_set_);
// With current picking logic, the best available host set gets 100% of
// traffic and all others get 0%
per_priority_load_[best_available_host_set_->priority()] = 100;
});

} // namespace Upstream
Expand All @@ -58,8 +65,6 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase(const PrioritySet& priority
resizePerPriorityState();
priority_set_.addMemberUpdateCb([this](uint32_t priority, const std::vector<HostSharedPtr>&,
const std::vector<HostSharedPtr>&) -> void {
// Update the host set to use for picking, based on the new state.
best_available_host_set_ = bestAvailable(&priority_set_);
// Make sure per_priority_state_ is as large as priority_set_.hostSetsPerPriority()
resizePerPriorityState();
// If there's a local priority set, regenerate all routing based on a potential size change to
Expand All @@ -79,9 +84,11 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase(const PrioritySet& priority
const std::vector<HostSharedPtr>&) -> void {
ASSERT(priority == 0);
UNREFERENCED_PARAMETER(priority);
// If the set of local Envoys changes, regenerate routing based on potential changes to
// the set of servers routing to priority_set_.
regenerateLocalityRoutingStructures(bestAvailablePriority());
// If the set of local Envoys changes, regenerate routing for all priority levels based on
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually need this change if we are only doing locality at P=0 now?

// potential changes to the set of servers routing to priority_set_.
for (size_t i = 0; i < priority_set_.hostSetsPerPriority().size(); ++i) {
regenerateLocalityRoutingStructures(i);
}
});
}
}
Expand All @@ -95,8 +102,7 @@ ZoneAwareLoadBalancerBase::~ZoneAwareLoadBalancerBase() {
void ZoneAwareLoadBalancerBase::regenerateLocalityRoutingStructures(uint32_t priority) {
ASSERT(local_priority_set_);
stats_.lb_recalculate_zone_structures_.inc();
// We are updating based on a change for a priority level in priority_set_, or the latched
// bestAvailablePriority() which is a latched priority for priority_set_.
// We are updating based on a change for a priority level in priority_set_.
ASSERT(priority < priority_set_.hostSetsPerPriority().size());
// resizePerPriorityState should ensure these stay in sync.
ASSERT(per_priority_state_.size() == priority_set_.hostSetsPerPriority().size());
Expand All @@ -112,9 +118,17 @@ void ZoneAwareLoadBalancerBase::regenerateLocalityRoutingStructures(uint32_t pri
size_t num_localities = host_set.healthyHostsPerLocality().size();
ASSERT(num_localities > 0);

// It is worth noting that all of the percentages calculated are orthogonal from
// how much load this priority level receives, percentageLoad(priority).
//
// If the host sets are such that 20% of load is handled locally and 80% is residual, and then
// half the hosts in all host sets go unhealthy, this priority set will
// still send half of the incoming load to the local locality and 80% to residual.
//
// Basically, fariness across localities within a priority is guaranteed. Fairness across
// localities across priorities is not.
uint64_t local_percentage[num_localities];
calculateLocalityPercentage(localHostSet().healthyHostsPerLocality(), local_percentage);

uint64_t upstream_percentage[num_localities];
calculateLocalityPercentage(host_set.healthyHostsPerLocality(), upstream_percentage);

Expand Down Expand Up @@ -173,7 +187,8 @@ void ZoneAwareLoadBalancerBase::resizePerPriorityState() {
}

bool ZoneAwareLoadBalancerBase::earlyExitNonLocalityRouting(uint32_t priority) {
if (priority_set_.hostSetsPerPriority().size() < priority + 1) {
// Locality routing not supported for multiple priorities.
if (priority > 0) {
return true;
}

Expand Down Expand Up @@ -231,19 +246,20 @@ void ZoneAwareLoadBalancerBase::calculateLocalityPercentage(
}
}

const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocalityHosts() {
PerPriorityState& state = *per_priority_state_[bestAvailablePriority()];
const std::vector<HostSharedPtr>&
ZoneAwareLoadBalancerBase::tryChooseLocalLocalityHosts(const HostSet& host_set) {
PerPriorityState& state = *per_priority_state_[host_set.priority()];
ASSERT(state.locality_routing_state_ != LocalityRoutingState::NoLocalityRouting);

// At this point it's guaranteed to be at least 2 localities.
size_t number_of_localities = best_available_host_set_->healthyHostsPerLocality().size();
size_t number_of_localities = host_set.healthyHostsPerLocality().size();

ASSERT(number_of_localities >= 2U);

// Try to push all of the requests to the same locality first.
if (state.locality_routing_state_ == LocalityRoutingState::LocalityDirect) {
stats_.lb_zone_routing_all_directly_.inc();
return best_available_host_set_->healthyHostsPerLocality()[0];
return host_set.healthyHostsPerLocality()[0];
}

ASSERT(state.locality_routing_state_ == LocalityRoutingState::LocalityResidual);
Expand All @@ -252,7 +268,7 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
// push to the local locality, check if we can push to local locality on current iteration.
if (random_.random() % 10000 < state.local_percent_to_route_) {
stats_.lb_zone_routing_sampled_.inc();
return best_available_host_set_->healthyHostsPerLocality()[0];
return host_set.healthyHostsPerLocality()[0];
}

// At this point we must route cross locality as we cannot route to the local locality.
Expand All @@ -262,8 +278,7 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
// locality percentages. In this case just select random locality.
if (state.residual_capacity_[number_of_localities - 1] == 0) {
stats_.lb_zone_no_capacity_left_.inc();
return best_available_host_set_
->healthyHostsPerLocality()[random_.random() % number_of_localities];
return host_set.healthyHostsPerLocality()[random_.random() % number_of_localities];
}

// Random sampling to select specific locality for cross locality traffic based on the additional
Expand All @@ -277,39 +292,38 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
i++;
}

return best_available_host_set_->healthyHostsPerLocality()[i];
return host_set.healthyHostsPerLocality()[i];
}

const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::hostsToUse() {
ASSERT(best_available_host_set_->healthyHosts().size() <=
best_available_host_set_->hosts().size());
const HostSet& host_set = chooseHostSet();

// If the best available priority has insufficient healthy hosts, return all hosts.
if (isGlobalPanic(*best_available_host_set_, runtime_)) {
// If the selected host set has insufficient healthy hosts, return all hosts.
if (isGlobalPanic(host_set, runtime_)) {
stats_.lb_healthy_panic_.inc();
return best_available_host_set_->hosts();
return host_set.hosts();
}

// If we've latched that we can't do priority-based routing, return healthy
// hosts for the best available priority.
if (per_priority_state_[bestAvailablePriority()]->locality_routing_state_ ==
// If we've latched that we can't do priority-based routing, return healthy hosts for the selected
// host set.
if (per_priority_state_[host_set.priority()]->locality_routing_state_ ==
LocalityRoutingState::NoLocalityRouting) {
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

// Determine if the load balancer should do zone based routing for this pick.
if (!runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) {
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

if (isGlobalPanic(localHostSet(), runtime_)) {
stats_.lb_local_cluster_not_ok_.inc();
// If the local Envoy instances are in global panic, do not do locality
// based routing.
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

return tryChooseLocalLocalityHosts();
return tryChooseLocalLocalityHosts(host_set);
}

HostConstSharedPtr RoundRobinLoadBalancer::chooseHost(LoadBalancerContext*) {
Expand Down
14 changes: 9 additions & 5 deletions source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,24 @@ class LoadBalancerBase {
LoadBalancerBase(const PrioritySet& priority_set, ClusterStats& stats, Runtime::Loader& runtime,
Runtime::RandomGenerator& random);

uint32_t bestAvailablePriority() const { return best_available_host_set_->priority(); }
const HostSet& chooseHostSet() { return *best_available_host_set_; }

uint32_t percentageLoad(uint32_t priority) const { return per_priority_load_[priority]; }

ClusterStats& stats_;
Runtime::Loader& runtime_;
Runtime::RandomGenerator& random_;
// The priority-ordered set of hosts to use for load balancing.
const PrioritySet& priority_set_;

private:
// The lowest priority host set from priority_set_ with healthy hosts, or the
// zero-priority host set if all host sets are fully unhealthy.
// This is updated as the hosts and healthy hosts in priority_set_ are updated
// but will never be null.
const HostSet* best_available_host_set_;
// The percentage load (0-100) for each priority level
std::vector<uint32_t> per_priority_load_;
};

/**
Expand Down Expand Up @@ -82,8 +88,9 @@ class ZoneAwareLoadBalancerBase : public LoadBalancerBase {

/**
* Try to select upstream hosts from the same locality.
* @param host_set the last host set returned by chooseHostSet()
*/
const std::vector<HostSharedPtr>& tryChooseLocalLocalityHosts();
const std::vector<HostSharedPtr>& tryChooseLocalLocalityHosts(const HostSet& host_set);

/**
* @return (number of hosts in a given locality)/(total number of hosts) in ret param.
Expand Down Expand Up @@ -116,9 +123,6 @@ class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
};
typedef std::unique_ptr<PerPriorityState> PerPriorityStatePtr;
// Routing state broken out for each priority level in priority_set_.
// With the current implementation we could save some CPU and memory by only
// tracking this for best_available_host_set_ but as we support gentle
// failover it's useful to precompute it for all priority levels.
std::vector<PerPriorityStatePtr> per_priority_state_;
Common::CallbackHandle* local_priority_set_member_update_cb_handle_{};
};
Expand Down
10 changes: 5 additions & 5 deletions source/common/upstream/ring_hash_lb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ RingHashLoadBalancer::RingHashLoadBalancer(
}

HostConstSharedPtr RingHashLoadBalancer::chooseHost(LoadBalancerContext* context) {
if (isGlobalPanic(*best_available_host_set_, runtime_)) {
const HostSet& host_set = chooseHostSet();
if (isGlobalPanic(host_set, runtime_)) {
stats_.lb_healthy_panic_.inc();
return per_priority_state_[bestAvailablePriority()]->all_hosts_ring_.chooseHost(context,
random_);
return per_priority_state_[host_set.priority()]->all_hosts_ring_.chooseHost(context, random_);
} else {
return per_priority_state_[bestAvailablePriority()]->healthy_hosts_ring_.chooseHost(context,
random_);
return per_priority_state_[host_set.priority()]->healthy_hosts_ring_.chooseHost(context,
random_);
}
}

Expand Down
84 changes: 81 additions & 3 deletions test/common/upstream/load_balancer_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,62 @@ class LoadBalancerTestBase : public ::testing::TestWithParam<bool> {
std::shared_ptr<MockClusterInfo> info_{new NiceMock<MockClusterInfo>()};
};

class TestLb : public LoadBalancerBase {
public:
TestLb(const PrioritySet& priority_set, ClusterStats& stats, Runtime::Loader& runtime,
Runtime::RandomGenerator& random)
: LoadBalancerBase(priority_set, stats, runtime, random) {}
using LoadBalancerBase::chooseHostSet;
using LoadBalancerBase::percentageLoad;
};

class LoadBalancerBaseTest : public LoadBalancerTestBase {
public:
TestLb lb_{priority_set_, stats_, runtime_, random_};
};

INSTANTIATE_TEST_CASE_P(PrimaryOrFailover, LoadBalancerBaseTest, ::testing::Values(true));

// Basic test of host set selection.
TEST_P(LoadBalancerBaseTest, PrioritySelecton) {
host_set_.hosts_ = {makeTestHost(info_, "tcp://127.0.0.1:80")};
failover_host_set_.hosts_ = {makeTestHost(info_, "tcp://127.0.0.1:81")};
host_set_.runCallbacks({}, {});

// With both the primary and failover hosts unhealthy, we should select an
// unhealthy primary host.
EXPECT_EQ(100, lb_.percentageLoad(0));
EXPECT_EQ(0, lb_.percentageLoad(1));
EXPECT_EQ(&host_set_, &lb_.chooseHostSet());

// Update the priority set with a new priority level P=2 and ensure the host
// is chosen
MockHostSet& tertiary_host_set_ = *priority_set_.getMockHostSet(2);
HostVectorSharedPtr hosts(
new std::vector<HostSharedPtr>({makeTestHost(info_, "tcp://127.0.0.1:82")}));
tertiary_host_set_.hosts_ = *hosts;
tertiary_host_set_.healthy_hosts_ = tertiary_host_set_.hosts_;
tertiary_host_set_.runCallbacks({}, {});
EXPECT_EQ(0, lb_.percentageLoad(0));
EXPECT_EQ(0, lb_.percentageLoad(1));
EXPECT_EQ(100, lb_.percentageLoad(2));
EXPECT_EQ(&tertiary_host_set_, &lb_.chooseHostSet());

// Now add a healthy host in P=0 and make sure it is immediately selected.
host_set_.healthy_hosts_ = host_set_.hosts_;
tertiary_host_set_.runCallbacks({}, {});
EXPECT_EQ(100, lb_.percentageLoad(0));
EXPECT_EQ(0, lb_.percentageLoad(2));
EXPECT_EQ(&host_set_, &lb_.chooseHostSet());

// Remove the healthy host and ensure we fail back over to tertiary_host_set_
host_set_.healthy_hosts_ = {};
host_set_.runCallbacks({}, {});
EXPECT_EQ(0, lb_.percentageLoad(0));
EXPECT_EQ(100, lb_.percentageLoad(2));
EXPECT_EQ(&tertiary_host_set_, &lb_.chooseHostSet());
}

class RoundRobinLoadBalancerTest : public LoadBalancerTestBase {
public:
void init(bool need_local_cluster) {
Expand Down Expand Up @@ -231,9 +287,13 @@ TEST_P(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) {
EXPECT_EQ(hostSet().healthy_hosts_[1], lb_->chooseHost(nullptr));
EXPECT_EQ(hostSet().healthy_hosts_[2], lb_->chooseHost(nullptr));

// Cluster size is computed once at zone aware struct regeneration point.
EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value());

if (&hostSet() == &host_set_) {
// Cluster size is computed once at zone aware struct regeneration point.
EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value());
} else {
EXPECT_EQ(0U, stats_.lb_zone_cluster_too_small_.value());
return;
}
EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6))
.WillRepeatedly(Return(1));
// Trigger reload.
Expand All @@ -243,6 +303,9 @@ TEST_P(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) {
}

TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
HostVectorSharedPtr hosts(new std::vector<HostSharedPtr>(
{makeTestHost(info_, "tcp://127.0.0.1:80"), makeTestHost(info_, "tcp://127.0.0.1:81"),
makeTestHost(info_, "tcp://127.0.0.1:82")}));
Expand Down Expand Up @@ -270,6 +333,9 @@ TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) {
}

TEST_P(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
HostVectorSharedPtr hosts(new std::vector<HostSharedPtr>(
{makeTestHost(info_, "tcp://127.0.0.1:80"), makeTestHost(info_, "tcp://127.0.0.1:81"),
makeTestHost(info_, "tcp://127.0.0.1:82")}));
Expand Down Expand Up @@ -305,6 +371,9 @@ TEST_P(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) {
}

TEST_P(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
HostVectorSharedPtr upstream_hosts(new std::vector<HostSharedPtr>(
{makeTestHost(info_, "tcp://127.0.0.1:80"), makeTestHost(info_, "tcp://127.0.0.1:81"),
makeTestHost(info_, "tcp://127.0.0.1:82"), makeTestHost(info_, "tcp://127.0.0.1:83"),
Expand Down Expand Up @@ -349,6 +418,9 @@ TEST_P(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) {
}

TEST_P(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
// upstream_hosts and local_hosts do not matter, zone aware routing is based on per zone hosts.
HostVectorSharedPtr upstream_hosts(
new std::vector<HostSharedPtr>({makeTestHost(info_, "tcp://127.0.0.1:80")}));
Expand Down Expand Up @@ -411,6 +483,9 @@ TEST_P(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) {
}

TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
HostVectorSharedPtr hosts(
new std::vector<HostSharedPtr>({makeTestHost(info_, "tcp://127.0.0.1:80")}));
HostListsSharedPtr hosts_per_locality(
Expand Down Expand Up @@ -445,6 +520,9 @@ TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareRoutingNotHealthy) {
}

TEST_P(RoundRobinLoadBalancerTest, NoZoneAwareRoutingLocalEmpty) {
if (&hostSet() == &failover_host_set_) { // P = 1 does not support zone-aware routing.
return;
}
HostVectorSharedPtr upstream_hosts(new std::vector<HostSharedPtr>(
{makeTestHost(info_, "tcp://127.0.0.1:80"), makeTestHost(info_, "tcp://127.0.0.1:81")}));
HostVectorSharedPtr local_hosts(new std::vector<HostSharedPtr>({}, {}));
Expand Down