Skip to content

Commit ed121e4

Browse files
committed
Enhance galera to interact over multiple clusters
This change adds a new resource agent "stretch_galera" which builds off of the existing "galera" agent. To accommodate this, the "galera" agent's shell script structure is modified slightly so that it can be sourced for its functions. The new resource agent adds a new parameter "remote_node_map" to the Galera resource agent which allows it to consider galera node names that are in other clusters as part of its Galera quorum. To achieve this, it launches read-only pcs commands to the remote clusters in order to view and modify remote state variables. Additionally, the stretch agent honors an optional pcs attribute <node>-initial-bootstrap which when applied to the local pcs nodes, will allow Galera to be bootstrapped with only that subset of nodes, without the additional remote nodes being available yet. An installer can set these attributes to allow the first pcs cluster to come online before subsequent clusters, and then remove the attributes.
1 parent 6b25525 commit ed121e4

File tree

2 files changed

+367
-72
lines changed

2 files changed

+367
-72
lines changed

heartbeat/galera

Lines changed: 88 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
##
2727
# README.
28-
#
28+
#
2929
# This agent only supports being configured as a multistate Master
3030
# resource.
3131
#
@@ -49,15 +49,15 @@
4949
# pcs resource create db galera enable_creation=true \
5050
# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
5151
#
52-
# By setting the 'enable_creation' option, the database will be automatically
52+
# By setting the 'enable_creation' option, the database will be automatically
5353
# generated at startup. The meta attribute 'master-max=3' means that all 3
5454
# nodes listed in the wsrep_cluster_address list will be allowed to connect
5555
# to the galera cluster and perform replication.
5656
#
5757
# NOTE: If you have more nodes in the pacemaker cluster then you wish
5858
# to have in the galera cluster, make sure to use location contraints to prevent
5959
# pacemaker from attempting to place a galera instance on a node that is
60-
# not in the 'wsrep_cluster_address" list.
60+
# not in the 'wsrep_cluster_address" list.
6161
#
6262
##
6363

@@ -101,7 +101,9 @@ UEND
101101
}
102102

103103
meta_data() {
104-
cat <<END
104+
extra_parameters="$1"
105+
106+
cat <<END
105107
<?xml version="1.0"?>
106108
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
107109
<resource-agent name="galera">
@@ -249,6 +251,8 @@ Cluster check user password
249251
<content type="string" default="" />
250252
</parameter>
251253
254+
${extra_parameters}
255+
252256
</parameters>
253257
254258
<actions>
@@ -331,7 +335,7 @@ get_last_commit()
331335

332336
if [ -z "$node" ]; then
333337
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
334-
else
338+
else
335339
${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null
336340
fi
337341
}
@@ -411,7 +415,7 @@ master_exists()
411415
return 1
412416
fi
413417
# determine if a master instance is already up and is healthy
414-
${HA_SBIN_DIR}/crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
418+
crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
415419
return $?
416420
}
417421

@@ -420,7 +424,7 @@ clear_master_score()
420424
local node=$(ocf_attribute_target $1)
421425
if [ -z "$node" ]; then
422426
$CRM_MASTER -D
423-
else
427+
else
424428
$CRM_MASTER -D -N $node
425429
fi
426430
}
@@ -431,7 +435,7 @@ set_master_score()
431435

432436
if [ -z "$node" ]; then
433437
$CRM_MASTER -v 100
434-
else
438+
else
435439
$CRM_MASTER -N $node -v 100
436440
fi
437441
}
@@ -480,6 +484,23 @@ pcmk_to_galera_name()
480484
}
481485

482486

487+
all_bootstrap_candidates()
488+
{
489+
local pcmk_nodes=""
490+
all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ')
491+
492+
for node in $all_nodes; do
493+
local pcmk_node=$(galera_to_pcmk_name $node)
494+
if [ -z "$pcmk_node" ]; then
495+
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
496+
return
497+
fi
498+
499+
pcmk_nodes="$pcmk_nodes $pcmk_node"
500+
done
501+
echo "$pcmk_nodes"
502+
}
503+
483504
detect_first_master()
484505
{
485506
local best_commit=0
@@ -492,24 +513,11 @@ detect_first_master()
492513
local best_node
493514
local safe_to_bootstrap
494515

495-
all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ')
496-
best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/')
497-
best_node=$(galera_to_pcmk_name $best_node_gcomm)
498-
if [ -z "$best_node" ]; then
499-
ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>."
500-
return
501-
fi
516+
all_nodes=$(all_bootstrap_candidates)
517+
best_node=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/')
502518

503519
# avoid selecting a recovered node as bootstrap if possible
504520
for node in $all_nodes; do
505-
local pcmk_node=$(galera_to_pcmk_name $node)
506-
if [ -z "$pcmk_node" ]; then
507-
ocf_log err "Could not determine pacemaker node from galera name <${node}>."
508-
return
509-
else
510-
node=$pcmk_node
511-
fi
512-
513521
if is_no_grastate $node; then
514522
nodes_recovered="$nodes_recovered $node"
515523
else
@@ -529,6 +537,8 @@ detect_first_master()
529537
# We don't need to wait for the other nodes to report state in this case
530538
missing_nodes=0
531539
break
540+
else
541+
ocf_log info "Node <${node}> is not marked as safe to bootstrap, continuing to look."
532542
fi
533543

534544
last_commit=$(get_last_commit $node)
@@ -914,64 +924,70 @@ galera_validate()
914924
mysql_common_validate
915925
}
916926

917-
case "$1" in
918-
meta-data) meta_data
919-
exit $OCF_SUCCESS;;
920-
usage|help) usage
921-
exit $OCF_SUCCESS;;
922-
esac
923-
924-
galera_validate
925-
rc=$?
926-
LSB_STATUS_STOPPED=3
927-
if [ $rc -ne 0 ]; then
927+
cmd_main() {
928928
case "$1" in
929-
stop) exit $OCF_SUCCESS;;
930-
monitor) exit $OCF_NOT_RUNNING;;
931-
status) exit $LSB_STATUS_STOPPED;;
932-
*) exit $rc;;
929+
meta-data) meta_data
930+
exit $OCF_SUCCESS;;
931+
usage|help) usage
932+
exit $OCF_SUCCESS;;
933933
esac
934-
fi
935934

936-
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
937-
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
938-
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
939-
fi
940-
if [ -z "${OCF_RESKEY_check_user}" ]; then
941-
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
942-
OCF_RESKEY_check_user=${MYSQL_USERNAME}
943-
fi
944-
: ${OCF_RESKEY_check_user="root"}
935+
galera_validate
936+
rc=$?
937+
LSB_STATUS_STOPPED=3
938+
if [ $rc -ne 0 ]; then
939+
case "$1" in
940+
stop) exit $OCF_SUCCESS;;
941+
monitor) exit $OCF_NOT_RUNNING;;
942+
status) exit $LSB_STATUS_STOPPED;;
943+
*) exit $rc;;
944+
esac
945+
fi
945946

946-
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
947-
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
948-
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
949-
fi
947+
if [ -z "${OCF_RESKEY_check_passwd}" ]; then
948+
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
949+
OCF_RESKEY_check_passwd=${MYSQL_PASSWORD}
950+
fi
951+
if [ -z "${OCF_RESKEY_check_user}" ]; then
952+
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
953+
OCF_RESKEY_check_user=${MYSQL_USERNAME}
954+
fi
955+
: ${OCF_RESKEY_check_user="root"}
950956

951-
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
952-
if [ -n "${MYSQL_HOST}" ]; then
953-
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
954-
fi
957+
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
958+
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
959+
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
960+
fi
955961

956-
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
957-
if [ -n "${MYSQL_PORT}" ]; then
958-
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
959-
fi
962+
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
963+
if [ -n "${MYSQL_HOST}" ]; then
964+
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
965+
fi
960966

967+
# This value is automatically sourced from /etc/sysconfig/checkcluster if available
968+
if [ -n "${MYSQL_PORT}" ]; then
969+
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
970+
fi
961971

972+
# What kind of method was invoked?
973+
case "$1" in
974+
start) galera_start;;
975+
stop) galera_stop;;
976+
status) mysql_common_status err;;
977+
monitor) galera_monitor;;
978+
promote) galera_promote;;
979+
demote) galera_demote;;
980+
validate-all) exit $OCF_SUCCESS;;
981+
982+
*) usage
983+
exit $OCF_ERR_UNIMPLEMENTED;;
984+
esac
985+
}
962986

963-
# What kind of method was invoked?
964-
case "$1" in
965-
start) galera_start;;
966-
stop) galera_stop;;
967-
status) mysql_common_status err;;
968-
monitor) galera_monitor;;
969-
promote) galera_promote;;
970-
demote) galera_demote;;
971-
validate-all) exit $OCF_SUCCESS;;
987+
# run 'main' if we aren't "sourceonly"
988+
if [ $1 != "sourceonly" ]; then
989+
cmd_main $@
990+
fi
972991

973-
*) usage
974-
exit $OCF_ERR_UNIMPLEMENTED;;
975-
esac
976992

977993
# vi:sw=4:ts=4:et:

0 commit comments

Comments
 (0)