Skip to content

Some fencer device registration refactors #3863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b26591d
Refactor: fencer: Best practices in update_cib_stonith_devices()
nrwahl2 Mar 30, 2025
242908a
Refactor: fencer: Match primitive XPath only with ID
nrwahl2 Mar 30, 2025
746d319
Refactor: fencer: Take patchset as arg in update_cib_stonith_devices()
nrwahl2 Mar 30, 2025
8a12fef
Refactor: fencer: Some best practices in stonith_device_register()
nrwahl2 Apr 5, 2025
d626bb1
Low: fencer: Don't remove device if child is deleted
nrwahl2 Mar 30, 2025
ae101e3
Refactor: fencer: Slight improvements to stonith_device_remove()
nrwahl2 Apr 5, 2025
39de562
Refactor: fencer: Drop stonith_device_t:has_attr_map
nrwahl2 Apr 7, 2025
41b2c59
Test: cts-fencing: Drop test for nodeid parameter inclusion
nrwahl2 Apr 8, 2025
42eb446
API: fencer: Don't automatically pass nodeid as parameter
nrwahl2 Apr 8, 2025
7770e92
Refactor: libstonithd: Drop stonith__action_create() target_nodeid arg
nrwahl2 Apr 8, 2025
5906b6c
Refactor: fencer: Drop stonith_device_t:priority
nrwahl2 Apr 8, 2025
f9110a3
Refactor: fencer: Rename stonith_device_t to fenced_device_t
nrwahl2 Apr 8, 2025
8c2581f
Refactor: fencer: Drop async_command_t:pid
nrwahl2 Apr 8, 2025
e5d1979
Refactor: fencer: Drop async_command_t:fd_stdout
nrwahl2 Apr 8, 2025
f12f5a5
Refactor: fencer: Drop async_command_t:last_timeout_signo
nrwahl2 Apr 8, 2025
e4b8d7e
Refactor: fencer: Drop async_command_t:timer_sigterm and timer_sigkill
nrwahl2 Apr 8, 2025
dcd26d2
Doc: fencer: Clarify async_command_t:device_list
nrwahl2 Apr 8, 2025
2f31105
Refactor: fencer: Functionize some of cib_devices_update()
nrwahl2 Apr 8, 2025
4666ca9
Refactor: fencer: New fenced_foreach_device()
nrwahl2 Apr 8, 2025
fe37a5f
Refactor: fencer: New fenced_foreach_device_remove()
nrwahl2 Apr 8, 2025
43303ca
Refactor: fencer: New fenced_has_watchdog_device()
nrwahl2 Apr 8, 2025
042aec7
Refactor: fencer: Make device_list file-scope
nrwahl2 Apr 8, 2025
bcfe5d5
Refactor: fencer: Rename free_device_list to fenced_free_device_table
nrwahl2 Apr 8, 2025
fd4669a
Refactor: fencer: Rename init_device_list to fenced_init_device_table
nrwahl2 Apr 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions cts/cts-fencing.in
Original file line number Diff line number Diff line change
Expand Up @@ -570,34 +570,6 @@ class FenceTests(Tests):
test.add_log_pattern("Delaying 'off' action targeting node3 using true3",
negative=True)

def build_nodeid_tests(self):
"""Register tests that use a corosync node id."""
our_uname = localname()

# verify nodeid is supplied when nodeid is in the metadata parameters
test = self.new_test("supply_nodeid",
"Verify nodeid is given when fence agent has nodeid as parameter")

test.add_cmd("stonith_admin",
args=f'--output-as=xml -R true1 -a fence_dummy -o mode=pass -o "pcmk_host_list={our_uname}"')
test.add_cmd("stonith_admin", args=f"--output-as=xml -F {our_uname} -t 3")
test.add_log_pattern(f"as nodeid with fence action 'off' targeting {our_uname}")

# verify nodeid is _NOT_ supplied when nodeid is not in the metadata parameters
test = self.new_test("do_not_supply_nodeid",
"Verify nodeid is _NOT_ given when fence agent does not have nodeid as parameter")

# use a host name that won't be in corosync.conf
test.add_cmd("stonith_admin",
args='--output-as=xml -R true1 -a fence_dummy_no_nodeid '
f'-o mode=pass -o pcmk_host_list="regr-test {our_uname}"')
test.add_cmd("stonith_admin", args="--output-as=xml -F regr-test -t 3")
test.add_log_pattern("as nodeid with fence action 'off' targeting regr-test",
negative=True)
test.add_cmd("stonith_admin", args=f"--output-as=xml -F {our_uname} -t 3")
test.add_log_pattern("as nodeid with fence action 'off' targeting {our_uname}",
negative=True)

def build_unfence_tests(self):
"""Register tests that verify unfencing."""
our_uname = localname()
Expand Down Expand Up @@ -917,7 +889,6 @@ def main():
tests.build_fence_no_merge_tests()
tests.build_unfence_tests()
tests.build_unfence_on_target_tests()
tests.build_nodeid_tests()
tests.build_remap_tests()
tests.build_query_tests()
tests.build_metadata_tests()
Expand Down
12 changes: 1 addition & 11 deletions cts/support/fence_dummy.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!@PYTHON@
"""Dummy fence agent for testing."""

__copyright__ = "Copyright 2012-2024 the Pacemaker project contributors"
__copyright__ = "Copyright 2012-2025 the Pacemaker project contributors"
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"

import io
Expand Down Expand Up @@ -159,14 +159,6 @@ ALL_OPT = {
"shortdesc": "Ignored",
"order": 4
},
"nodeid": {
"getopt": "i:",
"longopt": "nodeid",
"help": "-i, --nodeid Corosync id of fence target (ignored)",
"required": "0",
"shortdesc": "Ignored",
"order": 4
},
"uuid": {
"getopt": "U:",
"longopt": "uuid",
Expand Down Expand Up @@ -446,8 +438,6 @@ def main():
no_reboot = True
elif sys.argv[0].endswith("_no_on"):
no_on = True
elif sys.argv[0].endswith("_no_nodeid"):
del ALL_OPT["nodeid"]

device_opt = ALL_OPT.keys()

Expand Down
163 changes: 100 additions & 63 deletions daemons/fenced/fenced_cib.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,27 +194,60 @@ update_stonith_watchdog_timeout_ms(xmlNode *cib)
stonith_watchdog_timeout_ms = timeout_ms;
}

/*!
* \internal
* \brief Mark a fence device dirty if its \c cib_registered flag is \c TRUE
*
* \param[in] key Ignored
* \param[in,out] value Fence device (<tt>fenced_device_t *</tt>)
* \param[in] user_data Ignored
*
* \note This function is suitable for use with \c g_hash_table_foreach().
*/
static void
mark_dirty_if_cib_registered(gpointer key, gpointer value, gpointer user_data)
{
fenced_device_t *device = value;

if (device->cib_registered) {
device->dirty = TRUE;
}
}

/*!
* \internal
* \brief Return the value of a fence device's \c dirty flag
*
* \param[in] key Ignored
* \param[in] value Fence device (<tt>fenced_device_t *</tt>)
* \param[in] user_data Ignored
*
* \return \c dirty flag of \p value
*
* \note This function is suitable for use with
* \c g_hash_table_foreach_remove().
*/
static gboolean
device_is_dirty(gpointer key, gpointer value, gpointer user_data)
{
fenced_device_t *device = value;

return device->dirty;
}

/*!
* \internal
* \brief Update all STONITH device definitions based on current CIB
*/
static void
cib_devices_update(void)
{
GHashTableIter iter;
stonith_device_t *device = NULL;

crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, PCMK_XA_ADMIN_EPOCH),
crm_element_value(local_cib, PCMK_XA_EPOCH),
crm_element_value(local_cib, PCMK_XA_NUM_UPDATES));

g_hash_table_iter_init(&iter, device_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
if (device->cib_registered) {
device->dirty = TRUE;
}
}
fenced_foreach_device(mark_dirty_if_cib_registered, NULL);

/* have list repopulated if cib has a watchdog-fencing-resource
TODO: keep a cached list for queries happening while we are refreshing
Expand All @@ -224,78 +257,78 @@ cib_devices_update(void)

fenced_scheduler_run(local_cib);

g_hash_table_iter_init(&iter, device_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) {
if (device->dirty) {
g_hash_table_iter_remove(&iter);
}
}
fenced_foreach_device_remove(device_is_dirty);
}

#define PRIMITIVE_ID_XP_FRAGMENT "/" PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "='"

static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
update_cib_stonith_devices(const xmlNode *patchset)
{
int format = 1;
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT,
NULL, NULL);
xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
char *reason = NULL;

CRM_CHECK(patchset != NULL, return);
crm_element_value_int(patchset, PCMK_XA_FORMAT, &format);

if (format != 2) {
crm_warn("Unknown patch format: %d", format);
return;
}

for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL);
for (const xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL,
NULL);
change != NULL; change = pcmk__xe_next(change, NULL)) {

const char *op = crm_element_value(change, PCMK_XA_OPERATION);
const char *xpath = crm_element_value(change, PCMK_XA_PATH);
const char *shortpath = NULL;
const char *primitive_xpath = NULL;

if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches)
|| (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) {
continue;
}

if (pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)
&& (strstr(xpath, "/" PCMK_XE_PRIMITIVE) != NULL)) {
primitive_xpath = strstr(xpath, PRIMITIVE_ID_XP_FRAGMENT);
if ((primitive_xpath != NULL)
&& pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)) {

const char *rsc_id = NULL;
char *search = NULL;
char *mutable = NULL;
const char *end_quote = NULL;

if ((strstr(xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL)
|| (strstr(xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) {
if ((strstr(primitive_xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL)
|| (strstr(primitive_xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) {

reason = pcmk__str_copy("(meta) attribute deleted from "
"resource");
break;
}
mutable = pcmk__str_copy(xpath);
rsc_id = strstr(mutable, PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'");
if (rsc_id != NULL) {
rsc_id += strlen(PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'");
search = strchr(rsc_id, '\'');

rsc_id = primitive_xpath + sizeof(PRIMITIVE_ID_XP_FRAGMENT) - 1;
end_quote = strchr(rsc_id, '\'');

CRM_LOG_ASSERT(end_quote != NULL);
if (end_quote == NULL) {
crm_err("Bug: Malformed item in Pacemaker-generated patchset");
continue;
}
if (search != NULL) {
*search = 0;
stonith_device_remove(rsc_id, true);

if (strchr(end_quote, '/') == NULL) {
/* The primitive element itself was deleted. If this was a
* fencing resource, it's faster to remove it directly than to
* run the scheduler and update all device registrations.
*/
char *copy = strndup(rsc_id, end_quote - rsc_id);

pcmk__assert(copy != NULL);
stonith_device_remove(copy, true);

/* watchdog_device_update called afterwards
to fall back to implicit definition if needed */
} else {
crm_warn("Ignoring malformed CIB update (resource deletion)");

free(copy);
continue;
}
free(mutable);

} else if (strstr(xpath, "/" PCMK_XE_RESOURCES)
|| strstr(xpath, "/" PCMK_XE_CONSTRAINTS)
|| strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) {
shortpath = strrchr(xpath, '/');
pcmk__assert(shortpath != NULL);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
}

if (strstr(xpath, "/" PCMK_XE_RESOURCES)
|| strstr(xpath, "/" PCMK_XE_CONSTRAINTS)
|| strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) {

const char *shortpath = strrchr(xpath, '/');

reason = crm_strdup_printf("%s %s", op, shortpath + 1);
break;
}
}
Expand All @@ -313,8 +346,8 @@ static void
watchdog_device_update(void)
{
if (stonith_watchdog_timeout_ms > 0) {
if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
!stonith_watchdog_targets) {
if (!fenced_has_watchdog_device()
&& (stonith_watchdog_targets == NULL)) {
/* getting here watchdog-fencing enabled, no device there yet
and reason isn't stonith_watchdog_targets preventing that
*/
Expand All @@ -325,23 +358,22 @@ watchdog_device_update(void)
STONITH_WATCHDOG_ID,
st_namespace_internal,
STONITH_WATCHDOG_AGENT,
NULL, /* stonith_device_register will add our
NULL, /* fenced_device_register() will add our
own name as PCMK_STONITH_HOST_LIST param
so we can skip that here
*/
NULL);
rc = stonith_device_register(xml, TRUE);
rc = fenced_device_register(xml, true);
pcmk__xml_free(xml);
if (rc != pcmk_ok) {
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot register watchdog pseudo fence agent: %s",
pcmk_rc_str(rc));
stonith_shutdown(0);
}
}

} else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) {
} else if (fenced_has_watchdog_device()) {
/* be silent if no device - todo parameter to stonith_device_remove */
stonith_device_remove(STONITH_WATCHDOG_ID, true);
}
Expand Down Expand Up @@ -465,6 +497,7 @@ update_fencing_topology(const char *event, xmlNode *msg)
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
xmlNode *patchset = NULL;
long long timeout_ms_saved = stonith_watchdog_timeout_ms;
bool need_full_refresh = false;

Expand All @@ -483,7 +516,6 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *wrapper = NULL;
xmlNode *patchset = NULL;

crm_element_value_int(msg, PCMK__XA_CIB_RC, &rc);
if (rc != pcmk_ok) {
Expand All @@ -498,6 +530,11 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
/* @TODO Full refresh (with or without query) in case of
* -pcmk_err_old_data? It seems wrong to call
* stonith_device_remove() based on primitive deletion in an
* old diff.
*/
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
Expand Down Expand Up @@ -532,7 +569,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
} else {
// Partial refresh
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
update_cib_stonith_devices(patchset);
}

watchdog_device_update();
Expand Down
Loading