Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

load balancing: Disabling zone aware routing for non-zero priority levels. #2244

Merged
merged 4 commits into from
Dec 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 51 additions & 42 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,17 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats
Runtime::Loader& runtime, Runtime::RandomGenerator& random)
: stats_(stats), runtime_(runtime), random_(random), priority_set_(priority_set),
best_available_host_set_(bestAvailable(&priority_set)) {
per_priority_load_.resize(priority_set.hostSetsPerPriority().size());
per_priority_load_[best_available_host_set_->priority()] = 100;
priority_set_.addMemberUpdateCb([this](uint32_t, const std::vector<HostSharedPtr>&,
const std::vector<HostSharedPtr>&) -> void {
per_priority_load_.resize(priority_set_.hostSetsPerPriority().size());
per_priority_load_[best_available_host_set_->priority()] = 0;
// Update the host set to use for picking, based on the new state.
best_available_host_set_ = bestAvailable(&priority_set_);
// With current picking logic, the best available host set gets 100% of
// traffic and all others get 0%
per_priority_load_[best_available_host_set_->priority()] = 100;
});

} // namespace Upstream
Expand All @@ -58,14 +65,12 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase(const PrioritySet& priority
resizePerPriorityState();
priority_set_.addMemberUpdateCb([this](uint32_t priority, const std::vector<HostSharedPtr>&,
const std::vector<HostSharedPtr>&) -> void {
// Update the host set to use for picking, based on the new state.
best_available_host_set_ = bestAvailable(&priority_set_);
// Make sure per_priority_state_ is as large as priority_set_.hostSetsPerPriority()
resizePerPriorityState();
// If there's a local priority set, regenerate all routing based on a potential size change to
// the hosts routed to.
if (local_priority_set_) {
regenerateLocalityRoutingStructures(priority);
// If P=0 changes, regenerate locality routing structures. Locality based routing is disabled
// at all other levels.
if (local_priority_set_ && priority == 0) {
regenerateLocalityRoutingStructures();
}
});
if (local_priority_set_) {
Expand All @@ -79,9 +84,9 @@ ZoneAwareLoadBalancerBase::ZoneAwareLoadBalancerBase(const PrioritySet& priority
const std::vector<HostSharedPtr>&) -> void {
ASSERT(priority == 0);
UNREFERENCED_PARAMETER(priority);
// If the set of local Envoys changes, regenerate routing based on potential changes to
// the set of servers routing to priority_set_.
regenerateLocalityRoutingStructures(bestAvailablePriority());
// If the set of local Envoys changes, regenerate routing for P=0 as it does priority
// based routing.
regenerateLocalityRoutingStructures();
});
}
}
Expand All @@ -92,29 +97,36 @@ ZoneAwareLoadBalancerBase::~ZoneAwareLoadBalancerBase() {
}
}

void ZoneAwareLoadBalancerBase::regenerateLocalityRoutingStructures(uint32_t priority) {
void ZoneAwareLoadBalancerBase::regenerateLocalityRoutingStructures() {
ASSERT(local_priority_set_);
stats_.lb_recalculate_zone_structures_.inc();
// We are updating based on a change for a priority level in priority_set_, or the latched
// bestAvailablePriority() which is a latched priority for priority_set_.
ASSERT(priority < priority_set_.hostSetsPerPriority().size());
// resizePerPriorityState should ensure these stay in sync.
ASSERT(per_priority_state_.size() == priority_set_.hostSetsPerPriority().size());

// We only do locality routing for P=0
uint32_t priority = 0;
PerPriorityState& state = *per_priority_state_[priority];
// Do not perform any calculations if we cannot perform locality routing based on non runtime
// params.
PerPriorityState& state = *per_priority_state_[priority];
if (earlyExitNonLocalityRouting(priority)) {
if (earlyExitNonLocalityRouting()) {
state.locality_routing_state_ = LocalityRoutingState::NoLocalityRouting;
return;
}
HostSet& host_set = *priority_set_.hostSetsPerPriority()[priority];
size_t num_localities = host_set.healthyHostsPerLocality().size();
ASSERT(num_localities > 0);

// It is worth noting that all of the percentages calculated are orthogonal from
// how much load this priority level receives, percentageLoad(priority).
//
// If the host sets are such that 20% of load is handled locally and 80% is residual, and then
// half the hosts in all host sets go unhealthy, this priority set will
// still send half of the incoming load to the local locality and 80% to residual.
//
// Basically, fariness across localities within a priority is guaranteed. Fairness across
// localities across priorities is not.
uint64_t local_percentage[num_localities];
calculateLocalityPercentage(localHostSet().healthyHostsPerLocality(), local_percentage);

uint64_t upstream_percentage[num_localities];
calculateLocalityPercentage(host_set.healthyHostsPerLocality(), upstream_percentage);

Expand Down Expand Up @@ -168,16 +180,14 @@ void ZoneAwareLoadBalancerBase::regenerateLocalityRoutingStructures(uint32_t pri
void ZoneAwareLoadBalancerBase::resizePerPriorityState() {
const uint32_t size = priority_set_.hostSetsPerPriority().size();
while (per_priority_state_.size() < size) {
// Note for P!=0, PerPriorityState is created with NoLocalityRouting and never changed.
per_priority_state_.push_back(PerPriorityStatePtr{new PerPriorityState});
}
}

bool ZoneAwareLoadBalancerBase::earlyExitNonLocalityRouting(uint32_t priority) {
if (priority_set_.hostSetsPerPriority().size() < priority + 1) {
return true;
}

HostSet& host_set = *priority_set_.hostSetsPerPriority()[priority];
bool ZoneAwareLoadBalancerBase::earlyExitNonLocalityRouting() {
// We only do locality routing for P=0.
HostSet& host_set = *priority_set_.hostSetsPerPriority()[0];
if (host_set.healthyHostsPerLocality().size() < 2) {
return true;
}
Expand Down Expand Up @@ -231,19 +241,20 @@ void ZoneAwareLoadBalancerBase::calculateLocalityPercentage(
}
}

const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocalityHosts() {
PerPriorityState& state = *per_priority_state_[bestAvailablePriority()];
const std::vector<HostSharedPtr>&
ZoneAwareLoadBalancerBase::tryChooseLocalLocalityHosts(const HostSet& host_set) {
PerPriorityState& state = *per_priority_state_[host_set.priority()];
ASSERT(state.locality_routing_state_ != LocalityRoutingState::NoLocalityRouting);

// At this point it's guaranteed to be at least 2 localities.
size_t number_of_localities = best_available_host_set_->healthyHostsPerLocality().size();
size_t number_of_localities = host_set.healthyHostsPerLocality().size();

ASSERT(number_of_localities >= 2U);

// Try to push all of the requests to the same locality first.
if (state.locality_routing_state_ == LocalityRoutingState::LocalityDirect) {
stats_.lb_zone_routing_all_directly_.inc();
return best_available_host_set_->healthyHostsPerLocality()[0];
return host_set.healthyHostsPerLocality()[0];
}

ASSERT(state.locality_routing_state_ == LocalityRoutingState::LocalityResidual);
Expand All @@ -252,7 +263,7 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
// push to the local locality, check if we can push to local locality on current iteration.
if (random_.random() % 10000 < state.local_percent_to_route_) {
stats_.lb_zone_routing_sampled_.inc();
return best_available_host_set_->healthyHostsPerLocality()[0];
return host_set.healthyHostsPerLocality()[0];
}

// At this point we must route cross locality as we cannot route to the local locality.
Expand All @@ -262,8 +273,7 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
// locality percentages. In this case just select random locality.
if (state.residual_capacity_[number_of_localities - 1] == 0) {
stats_.lb_zone_no_capacity_left_.inc();
return best_available_host_set_
->healthyHostsPerLocality()[random_.random() % number_of_localities];
return host_set.healthyHostsPerLocality()[random_.random() % number_of_localities];
}

// Random sampling to select specific locality for cross locality traffic based on the additional
Expand All @@ -277,39 +287,38 @@ const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::tryChooseLocalLocal
i++;
}

return best_available_host_set_->healthyHostsPerLocality()[i];
return host_set.healthyHostsPerLocality()[i];
}

const std::vector<HostSharedPtr>& ZoneAwareLoadBalancerBase::hostsToUse() {
ASSERT(best_available_host_set_->healthyHosts().size() <=
best_available_host_set_->hosts().size());
const HostSet& host_set = chooseHostSet();

// If the best available priority has insufficient healthy hosts, return all hosts.
if (isGlobalPanic(*best_available_host_set_, runtime_)) {
// If the selected host set has insufficient healthy hosts, return all hosts.
if (isGlobalPanic(host_set, runtime_)) {
stats_.lb_healthy_panic_.inc();
return best_available_host_set_->hosts();
return host_set.hosts();
}

// If we've latched that we can't do priority-based routing, return healthy
// hosts for the best available priority.
if (per_priority_state_[bestAvailablePriority()]->locality_routing_state_ ==
// If we've latched that we can't do priority-based routing, return healthy hosts for the selected
// host set.
if (per_priority_state_[host_set.priority()]->locality_routing_state_ ==
LocalityRoutingState::NoLocalityRouting) {
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

// Determine if the load balancer should do zone based routing for this pick.
if (!runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) {
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

if (isGlobalPanic(localHostSet(), runtime_)) {
stats_.lb_local_cluster_not_ok_.inc();
// If the local Envoy instances are in global panic, do not do locality
// based routing.
return best_available_host_set_->healthyHosts();
return host_set.healthyHosts();
}

return tryChooseLocalLocalityHosts();
return tryChooseLocalLocalityHosts(host_set);
}

HostConstSharedPtr RoundRobinLoadBalancer::chooseHost(LoadBalancerContext*) {
Expand Down
18 changes: 11 additions & 7 deletions source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,24 @@ class LoadBalancerBase {
LoadBalancerBase(const PrioritySet& priority_set, ClusterStats& stats, Runtime::Loader& runtime,
Runtime::RandomGenerator& random);

uint32_t bestAvailablePriority() const { return best_available_host_set_->priority(); }
const HostSet& chooseHostSet() { return *best_available_host_set_; }

uint32_t percentageLoad(uint32_t priority) const { return per_priority_load_[priority]; }

ClusterStats& stats_;
Runtime::Loader& runtime_;
Runtime::RandomGenerator& random_;
// The priority-ordered set of hosts to use for load balancing.
const PrioritySet& priority_set_;

private:
// The lowest priority host set from priority_set_ with healthy hosts, or the
// zero-priority host set if all host sets are fully unhealthy.
// This is updated as the hosts and healthy hosts in priority_set_ are updated
// but will never be null.
const HostSet* best_available_host_set_;
// The percentage load (0-100) for each priority level
std::vector<uint32_t> per_priority_load_;
};

/**
Expand Down Expand Up @@ -78,12 +84,13 @@ class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
* @return decision on quick exit from locality aware routing based on cluster configuration.
* This gets recalculated on update callback.
*/
bool earlyExitNonLocalityRouting(uint32_t priority);
bool earlyExitNonLocalityRouting();

/**
* Try to select upstream hosts from the same locality.
* @param host_set the last host set returned by chooseHostSet()
*/
const std::vector<HostSharedPtr>& tryChooseLocalLocalityHosts();
const std::vector<HostSharedPtr>& tryChooseLocalLocalityHosts(const HostSet& host_set);

/**
* @return (number of hosts in a given locality)/(total number of hosts) in ret param.
Expand All @@ -97,7 +104,7 @@ class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
/**
* Regenerate locality aware routing structures for fast decisions on upstream locality selection.
*/
void regenerateLocalityRoutingStructures(uint32_t priority);
void regenerateLocalityRoutingStructures();

HostSet& localHostSet() const { return *local_priority_set_->hostSetsPerPriority()[0]; }

Expand All @@ -116,9 +123,6 @@ class ZoneAwareLoadBalancerBase : public LoadBalancerBase {
};
typedef std::unique_ptr<PerPriorityState> PerPriorityStatePtr;
// Routing state broken out for each priority level in priority_set_.
// With the current implementation we could save some CPU and memory by only
// tracking this for best_available_host_set_ but as we support gentle
// failover it's useful to precompute it for all priority levels.
std::vector<PerPriorityStatePtr> per_priority_state_;
Common::CallbackHandle* local_priority_set_member_update_cb_handle_{};
};
Expand Down
10 changes: 5 additions & 5 deletions source/common/upstream/ring_hash_lb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ RingHashLoadBalancer::RingHashLoadBalancer(
}

HostConstSharedPtr RingHashLoadBalancer::chooseHost(LoadBalancerContext* context) {
if (isGlobalPanic(*best_available_host_set_, runtime_)) {
const HostSet& host_set = chooseHostSet();
if (isGlobalPanic(host_set, runtime_)) {
stats_.lb_healthy_panic_.inc();
return per_priority_state_[bestAvailablePriority()]->all_hosts_ring_.chooseHost(context,
random_);
return per_priority_state_[host_set.priority()]->all_hosts_ring_.chooseHost(context, random_);
} else {
return per_priority_state_[bestAvailablePriority()]->healthy_hosts_ring_.chooseHost(context,
random_);
return per_priority_state_[host_set.priority()]->healthy_hosts_ring_.chooseHost(context,
random_);
}
}

Expand Down
Loading