Skip to content

Commit

Permalink
Abort takeover only if an active master exists
Browse files Browse the repository at this point in the history
https://bugzilla.redhat.com/show_bug.cgi?id=1402943

Previously, we would abort if a different master existed, even if it was
shut down.

* server 1 is master and shuts down
* server 3 runs monitor_servers, becomes master and shuts down
* server 2 runs monitor_servers AFTER 3 becomes master

server 2 wouldn't take over as master because it sees the inactive
server 3 as master.
  • Loading branch information
jrafanie committed Dec 8, 2016
1 parent b5c09d8 commit f4bb169
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
10 changes: 7 additions & 3 deletions app/models/miq_server/server_monitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@ def make_master_server(last_master)
_log.info "Master server has #{last_master.nil? ? "not been set" : "died, #{last_master.name}"}. Attempting takeover as new master server, #{name}."
parent = MiqRegion.my_region
parent.lock do
all_servers = parent.miq_servers
# See if an ACTIVE server has already taken over
active_servers = parent.active_miq_servers

_log.debug "Double checking that nothing has changed"
master = all_servers.detect(&:is_master?)
master = active_servers.detect(&:is_master?)
if (last_master.nil? && !master.nil?) || (!last_master.nil? && !master.nil? && last_master.id != master.id)
_log.info "Aborting master server takeover as another server, #{master.name}, has taken control first."
return nil
end

all_servers.each do |s|
_log.debug "Setting this server, #{name}, as master server"

# Set is_master on self, reset every other server in the region, including
# inactive ones.
parent.miq_servers.each do |s|
s.is_master = (id == s.id)
s.save!
end
Expand Down
32 changes: 32 additions & 0 deletions spec/models/miq_server/server_monitor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,38 @@
@miq_server3.reload
end

it "handles multiple failover transitions" do
# server1 is first to start, becomes master
@miq_server1.monitor_servers

# Initialize the bookkeeping around current and last master
@miq_server2.monitor_servers
@miq_server3.monitor_servers

# server1 is master
expect(@miq_server1.reload.is_master).to be_truthy
expect(@miq_server2.reload.is_master).to be_falsey
expect(@miq_server3.reload.is_master).to be_falsey

# server 1 shuts down
@miq_server1.update(:status => "stopped")

# server 3 becomes master, server 2 hasn't monitored servers yet
@miq_server3.monitor_servers
expect(@miq_server1.reload.is_master).to be_falsey
expect(@miq_server2.reload.is_master).to be_falsey
expect(@miq_server3.reload.is_master).to be_truthy

# server 3 shuts down
@miq_server3.update(:status => "stopped")

# server 2 finally gets to monitor_servers, takes over
@miq_server2.monitor_servers
expect(@miq_server1.reload.is_master).to be_falsey
expect(@miq_server2.reload.is_master).to be_truthy
expect(@miq_server3.reload.is_master).to be_falsey
end

it "should have all roles active after sync between them" do
expect(@miq_server1.active_role_names.include?("ems_operations")).to be_truthy
expect(@miq_server2.active_role_names.include?("ems_operations")).to be_truthy
Expand Down

0 comments on commit f4bb169

Please sign in to comment.