Skip to content

Commit

Permalink
geo cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasyang2022 committed Oct 19, 2023
1 parent 9d3dbbc commit a984f5b
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 37 deletions.
110 changes: 80 additions & 30 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1741,11 +1741,7 @@ def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List
ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("sshd.service", enable=True)
if ssh_public_keys:
local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')})
shell = sh.SSHShell(local_shell, 'root')
# FIXME: detect the availability of ssh session
authorized_key_manager = ssh_key.AuthorizedKeyManager(shell)
for key in ssh_public_keys:
authorized_key_manager.add(None, local_user, key)
join_ssh_with_ssh_agent(local_shell, local_user, seed_host, seed_user, ssh_public_keys)
else:
local_shell = sh.LocalShell()
configure_ssh_key(local_user)
Expand Down Expand Up @@ -1786,6 +1782,18 @@ def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List
swap_public_ssh_key_for_secondary_user(sh.cluster_shell(), seed_host, 'hacluster')


def join_ssh_with_ssh_agent(
local_shell: sh.LocalShell,
local_user: str, seed_host: str, seed_user: str,
ssh_public_keys: typing.List[ssh_key.Key],
):
# As ssh-agent is used, the local_user does not have any effects
shell = sh.SSHShell(local_shell, 'root')
# FIXME: detect the availability of ssh session
authorized_key_manager = ssh_key.AuthorizedKeyManager(shell)
for key in ssh_public_keys:
authorized_key_manager.add(None, local_user, key)


def swap_public_ssh_key_for_secondary_user(shell: sh.ClusterShell, host: str, user: str):
key_file_manager = ssh_key.KeyFileManager(shell)
Expand Down Expand Up @@ -2806,29 +2814,6 @@ def bootstrap_init_geo(context):


def geo_fetch_config(node):
user, node = utils.parse_user_at_host(node)
if user is not None:
try:
local_user = utils.user_of(utils.this_node())
except UserNotFoundError:
local_user = user
remote_user = user
else:
try:
local_user, remote_user = UserOfHost.instance().user_pair_for_ssh(node)
except UserNotFoundError:
try:
local_user = utils.user_of(utils.this_node())
except UserNotFoundError:
local_user = userdir.getuser()
remote_user = local_user
configure_ssh_key(local_user)
logger.info("Retrieving configuration - This may prompt for %s@%s:", remote_user, node)
utils.ssh_copy_id(local_user, remote_user, node)
user_by_host = utils.HostUserConfig()
user_by_host.add(local_user, utils.this_node())
user_by_host.add(remote_user, node)
user_by_host.save_local()
cmd = "tar -c -C '{}' .".format(BOOTH_DIR)
with tempfile.TemporaryDirectory() as tmpdir:
pipe_outlet, pipe_inlet = os.pipe()
Expand Down Expand Up @@ -2859,6 +2844,27 @@ def geo_fetch_config(node):
raise ValueError("Problem encountered with booth configuration from {}: {}".format(node, err))


def _select_user_pair_for_ssh_for_secondary_components(dest: str):
"""Select a user pair for operating secondary components, e.g. qdevice and geo cluster arbitor"""
user, node = utils.parse_user_at_host(dest)
if user is not None:
try:
local_user = utils.user_of(utils.this_node())
except UserNotFoundError:
local_user = user
remote_user = user
else:
try:
local_user, remote_user = UserOfHost.instance().user_pair_for_ssh(node)
except UserNotFoundError:
try:
local_user = utils.user_of(utils.this_node())
except UserNotFoundError:
local_user = userdir.getuser()
remote_user = local_user
return local_user, remote_user, node


def geo_cib_config(clusters):
cluster_name = corosync.get_values('totem.cluster_name')[0]
if cluster_name not in list(clusters.keys()):
Expand Down Expand Up @@ -2886,7 +2892,29 @@ def bootstrap_join_geo(context):
_context = context
init_common_geo()
check_tty()
geo_fetch_config(_context.cluster_node)
user, node = utils.parse_user_at_host(_context.cluster_node)
if not sh.cluster_shell().can_run_as(node, 'root'):
local_user, remote_user, node = _select_user_pair_for_ssh_for_secondary_components(_context.cluster_node)
if context.use_ssh_agent:
try:
ssh_agent = ssh_key.AgentClient()
keys = ssh_agent.list()
except ssh_key.Error:
logger.error("Cannot get a public key from ssh-agent.")
raise
local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')})
join_ssh_with_ssh_agent(local_shell, local_user, node, remote_user, keys)
else:
configure_ssh_key(local_user)
if 0 != utils.ssh_copy_id_no_raise(local_user, remote_user, node):
raise ValueError(f"Failed to login to {remote_user}@{node}. Please check the credentials.")
swap_public_ssh_key(node, local_user, remote_user, local_user, remote_user, add=True)
user_by_host = utils.HostUserConfig()
user_by_host.add(local_user, utils.this_node())
user_by_host.add(remote_user, node)
user_by_host.set_no_generating_ssh_key(context.use_ssh_agent)
user_by_host.save_local()
geo_fetch_config(node)
logger.info("Sync booth configuration across cluster")
csync2_update(BOOTH_DIR)
geo_cib_config(_context.clusters)
Expand All @@ -2902,7 +2930,29 @@ def bootstrap_arbitrator(context):

init_common_geo()
check_tty()
geo_fetch_config(_context.cluster_node)
user, node = utils.parse_user_at_host(_context.cluster_node)
if not sh.cluster_shell().can_run_as(node, 'root'):
local_user, remote_user, node = _select_user_pair_for_ssh_for_secondary_components(_context.cluster_node)
if context.use_ssh_agent:
try:
ssh_agent = ssh_key.AgentClient()
keys = ssh_agent.list()
except ssh_key.Error:
logger.error("Cannot get a public key from ssh-agent.")
raise
local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')})
join_ssh_with_ssh_agent(local_shell, local_user, node, remote_user, keys)
else:
configure_ssh_key(local_user)
if 0 != utils.ssh_copy_id_no_raise(local_user, remote_user, node):
raise ValueError(f"Failed to login to {remote_user}@{node}. Please check the credentials.")
swap_public_ssh_key(node, local_user, remote_user, local_user, remote_user, add=True)
user_by_host = utils.HostUserConfig()
user_by_host.add(local_user, utils.this_node())
user_by_host.add(remote_user, node)
user_by_host.set_no_generating_ssh_key(context.use_ssh_agent)
user_by_host.save_local()
geo_fetch_config(node)
if not os.path.isfile(BOOTH_CFG):
utils.fatal("Failed to copy {} from {}".format(BOOTH_CFG, _context.cluster_node))
# TODO: verify that the arbitrator IP in the configuration is us?
Expand Down
13 changes: 8 additions & 5 deletions crmsh/sh.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,14 @@ def __init__(
self.raise_ssh_error = raise_ssh_error

def can_run_as(self, host: typing.Optional[str], user: str) -> bool:
result = self.subprocess_run_without_input(
host, user, 'true',
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
try:
result = self.subprocess_run_without_input(
host, user, 'true',
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except user_of_host.UserNotFoundError:
return False
return 0 == result.returncode

def subprocess_run_without_input(self, host: typing.Optional[str], user: typing.Optional[str], cmd: str, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,8 @@ def do_geo_init_arbitrator(self, context, *args):
parser.add_argument("-q", "--quiet", help="Be quiet (don't describe what's happening, just do it)", action="store_true", dest="quiet")
parser.add_argument("-y", "--yes", help='Answer "yes" to all prompts (use with caution)', action="store_true", dest="yes_to_all")
parser.add_argument("-c", "--cluster-node", metavar="[USER@]HOST", help="An already-configured geo cluster", dest="cluster_node")
parser.add_argument('--use-ssh-agent', action='store_true',
help="Use an existing key from ssh-agent instead of creating new key pairs")
options, args = parse_options(parser, args)
if options is None or args is None:
return
Expand Down
29 changes: 28 additions & 1 deletion test/features/geo_setup.feature
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ Feature: geo cluster
Tag @clean means need to stop cluster service if the service is available
Need nodes: hanode1 hanode2 hanode3

@clean
Scenario: GEO cluster setup
Given Cluster service is "stopped" on "hanode1"
And Cluster service is "stopped" on "hanode2"
Expand All @@ -27,3 +26,31 @@ Feature: geo cluster
Then Show cluster status on "hanode1"
When Run "crm resource start g-booth" on "hanode2"
Then Show cluster status on "hanode2"

Scenario: GEO cluster setup with ssh-agent
When Run "crm cluster stop" on "hanode1"
And Run "crm cluster stop" on "hanode2"
And Run "systemctl disable --now booth@booth" on "hanode1,hanode2,hanode3"
Given Cluster service is "stopped" on "hanode1"
And Cluster service is "stopped" on "hanode2"
And Directory "/root/.ssh" is renamed to "/root/ssh_disabled" on nodes ["hanode1", "hanode2", "hanode3"]
And ssh-agent is started at "/root/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"]
When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock ssh-add /root/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3"
And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster1 --use-ssh-agent" on "hanode1"
Then Cluster service is "started" on "hanode1"
When Run "crm configure primitive vip IPaddr2 params ip=@vip.0" on "hanode1"

When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster2 --use-ssh-agent" on "hanode2"
Then Cluster service is "started" on "hanode2"
When Run "crm configure primitive vip IPaddr2 params ip=@vip.1" on "hanode2"

When Run "crm cluster geo_init -y --clusters "cluster1=@vip.0 cluster2=@vip.1" --tickets tickets-geo --arbitrator hanode3" on "hanode1"
When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_join -y --use-ssh-agent --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2"
Given Service "booth@booth" is "stopped" on "hanode3"
When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_init_arbitrator -y --use-ssh-agent --cluster-node hanode1" on "hanode3"
Then Service "booth@booth" is "started" on "hanode3"
When Run "crm resource start g-booth" on "hanode1"
Then Show cluster status on "hanode1"
When Run "crm resource start g-booth" on "hanode2"
Then Show cluster status on "hanode2"
2 changes: 1 addition & 1 deletion test/features/steps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _wrap_cmd_non_root(cmd):
else:
return cmd
if re.search('cluster (:?join|geo_join|geo_init_arbitrator)', cmd) and "@" not in cmd:
cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd)
cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node|--arbitrator)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd)
elif "cluster init" in cmd and ("-N" in cmd or "--qnetd-hostname" in cmd) and "@" not in cmd:
cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd)
elif "cluster init" in cmd and "--node" in cmd and "@" not in cmd:
Expand Down

0 comments on commit a984f5b

Please sign in to comment.