From 7694880c99329e73d8fdf049c551bbb3d141cc65 Mon Sep 17 00:00:00 2001 From: Pekka Lampio Date: Fri, 13 Jun 2025 14:02:25 +0300 Subject: [PATCH 1/2] MDEV-36554 Assertion `is_wsrep() == wsrep_on(mysql_thd)' failed in void trx_t::commit_in_memory(const mtr_t*) This bug fix prevents debug assertion failure when Galera feature retry applying is enabled. This patch introduces also a general mechanism for temporarily disabling some debug assertions in InnoDB code. --- include/mysql/service_wsrep.h | 5 +++ mysql-test/suite/galera/r/mdev-36554.result | 25 ++++++++++++ mysql-test/suite/galera/t/mdev-36554.test | 45 +++++++++++++++++++++ sql/sql_class.cc | 18 +++++++++ sql/sql_class.h | 8 +++- sql/wsrep_applier.cc | 3 ++ storage/innobase/trx/trx0trx.cc | 8 +++- 7 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 mysql-test/suite/galera/r/mdev-36554.result create mode 100644 mysql-test/suite/galera/t/mdev-36554.test diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index cf94364c1cf82..83ce48a1dbe13 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -8,6 +8,11 @@ enum Wsrep_service_key_type WSREP_SERVICE_KEY_UPDATE, WSREP_SERVICE_KEY_EXCLUSIVE }; + + +/* the bits in the bitmask for disabling temporarily some asserts */ +#define WSREP_ASSERT_INNODB_TRX 1 + #if (defined (MYSQL_DYNAMIC_PLUGIN) && defined(MYSQL_SERVICE_WSREP_DYNAMIC_INCLUDED)) || (!defined(MYSQL_DYNAMIC_PLUGIN) && defined(MYSQL_SERVICE_WSREP_STATIC_INCLUDED)) diff --git a/mysql-test/suite/galera/r/mdev-36554.result b/mysql-test/suite/galera/r/mdev-36554.result new file mode 100644 index 0000000000000..c4271093c1f89 --- /dev/null +++ b/mysql-test/suite/galera/r/mdev-36554.result @@ -0,0 +1,25 @@ +connection node_2; +connection node_1; +CALL mtr.add_suppression("Event .* Update_rows.* apply failed"); +CALL mtr.add_suppression("mariadbd: Can't find record in 't1'"); +CALL mtr.add_suppression("Failed to apply write set.*"); +CALL mtr.add_suppression("Inconsistency detected"); +connection node_1; +CREATE TABLE t1 (f1 INTEGER); +INSERT INTO t1 VALUES (1); +connection node_2; +SET SESSION wsrep_on = OFF; +DELETE FROM t1; +SET SESSION wsrep_on = ON; +SET GLOBAL wsrep_applier_retry_count=1; +connection node_1; +UPDATE t1 SET f1 = f1 + 1; +connection node_2; +Shutting down server ... +SET wsrep_on=OFF; +Restarting server ... +connection node_1; +SET wsrep_sync_wait=0; +connection node_1; +SET GLOBAL wsrep_applier_retry_count = 0; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/mdev-36554.test b/mysql-test/suite/galera/t/mdev-36554.test new file mode 100644 index 0000000000000..f2a57c6e242c8 --- /dev/null +++ b/mysql-test/suite/galera/t/mdev-36554.test @@ -0,0 +1,45 @@ +# +# Test for MDEV-36554 +# + +--source include/galera_cluster.inc + +CALL mtr.add_suppression("Event .* Update_rows.* apply failed"); +CALL mtr.add_suppression("mariadbd: Can't find record in 't1'"); +CALL mtr.add_suppression("Failed to apply write set.*"); +CALL mtr.add_suppression("Inconsistency detected"); + +--connection node_1 +CREATE TABLE t1 (f1 INTEGER); +INSERT INTO t1 VALUES (1); + +--connection node_2 +SET SESSION wsrep_on = OFF; +DELETE FROM t1; +SET SESSION wsrep_on = ON; +SET GLOBAL wsrep_applier_retry_count=1; + +--connection node_1 +UPDATE t1 SET f1 = f1 + 1; + +# restart node 2 +--connection node_2 +--echo Shutting down server ... +SET wsrep_on=OFF; +--source include/shutdown_mysqld.inc +--source include/wait_until_disconnected.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat +--echo Restarting server ... +--source include/start_mysqld.inc + +# wait till node 2 is back in the cluster +--connection node_1 +SET wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +# cleanup +--connection node_1 +SET GLOBAL wsrep_applier_retry_count = 0; +DROP TABLE t1; + diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 08354e0fa2db3..f5c2006486000 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -762,6 +762,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_affected_rows(0), wsrep_has_ignored_error(false), wsrep_was_on(false), + wsrep_assert_bitmask(0), wsrep_ignore_table(false), wsrep_aborter(0), wsrep_delayed_BF_abort(false), @@ -8659,3 +8660,20 @@ LEX_CSTRING make_string(THD *thd, const char *start_ptr, size_t length= end_ptr - start_ptr; return {strmake_root(thd->mem_root, start_ptr, length), length}; } + + +#ifdef WITH_WSREP +void wsrep_set_assert(THD *thd, uint64 assert, bool is_disabled) +{ + if (is_disabled) { + thd->wsrep_assert_bitmask |= assert; + } else { + thd->wsrep_assert_bitmask &= ~assert; + } +} + +bool wsrep_get_assert(THD *thd, uint64 assert) +{ + return (thd->wsrep_assert_bitmask & assert ? true: false); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_class.h b/sql/sql_class.h index c9e6d8d5f2d0e..6949e1ae0214f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -5682,6 +5682,7 @@ class THD: public THD_count, /* this must be first */ bool wsrep_has_ignored_error; /* true if wsrep_on was ON in last wsrep_on_update */ bool wsrep_was_on; + uint64 wsrep_assert_bitmask; /* When enabled, do not replicate/binlog updates from the current table that's @@ -5720,7 +5721,6 @@ class THD: public THD_count, /* this must be first */ return m_wsrep_client_state.transaction().id().get(); } - /* Set next trx id */ @@ -8522,5 +8522,11 @@ LEX_CSTRING make_string(THD *thd, const char *start_ptr, #include "deprecation.h" + +#ifdef WITH_WSREP +extern void wsrep_set_assert(THD* thd, uint64 assert, bool is_disabled); +extern bool wsrep_get_assert(THD* thd, uint64 assert); +#endif + #endif /* MYSQL_SERVER */ #endif /* SQL_CLASS_INCLUDED */ diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc index 31a28ac4d4164..d75a5e208a417 100644 --- a/sql/wsrep_applier.cc +++ b/sql/wsrep_applier.cc @@ -292,11 +292,14 @@ int wsrep_apply_events(THD* thd, /* rollback to savepoint without telling Wsrep-lib */ thd->variables.wsrep_on = false; + wsrep_set_assert(thd, true, WSREP_ASSERT_INNODB_TRX); if (FALSE != trans_rollback_to_savepoint(thd, savepoint)) { thd->variables.wsrep_on = true; + wsrep_set_assert(thd, false, WSREP_ASSERT_INNODB_TRX); break; } thd->variables.wsrep_on = true; + wsrep_set_assert(thd, false, WSREP_ASSERT_INNODB_TRX); /* reset THD object for retry */ thd->clear_error(); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 55c283859cbfc..3022d6dc37778 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1382,6 +1382,10 @@ ATTRIBUTE_NOINLINE static void trx_commit_cleanup(trx_undo_t *&undo) undo= nullptr; } + +extern bool wsrep_get_assert(THD *thd, uint64 assert); + + TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) { /* We already detached from rseg in write_serialisation_history() */ @@ -1501,7 +1505,9 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) trx_finalize_for_fts(this, undo_no != 0); #ifdef WITH_WSREP - ut_ad(is_wsrep() == wsrep_on(mysql_thd)); + if (!mysql_thd || !wsrep_get_assert(mysql_thd, WSREP_ASSERT_INNODB_TRX)) { + ut_ad(is_wsrep() == wsrep_on(mysql_thd)); + } /* Serialization history has been written and the transaction is committed in memory, which makes this commit ordered. Release commit From 9cdb3d272dcd5d32d14641dd80ed778cbea42ce7 Mon Sep 17 00:00:00 2001 From: Pekka Lampio Date: Fri, 4 Jul 2025 14:39:04 +0300 Subject: [PATCH 2/2] Modified the bug fix for MDEV-36554 by replacing the general mechanishm for disabling assertions in InnoDB code with solution specific to this rollback issue. The appliers sets a flag in the applier thread for the duration of the local rollback. This is used in InnoDB code to detect a local rollback. --- include/mysql/service_wsrep.h | 4 ++++ sql/service_wsrep.cc | 16 ++++++++++++++++ sql/sql_class.cc | 19 +------------------ sql/sql_class.h | 11 ++++------- sql/sql_plugin_services.inl | 1 + sql/wsrep_applier.cc | 6 +++--- sql/wsrep_dummy.cc | 3 +++ storage/innobase/trx/trx0trx.cc | 12 +++++------- 8 files changed, 37 insertions(+), 35 deletions(-) diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 83ce48a1dbe13..4168b133a4853 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -69,6 +69,7 @@ extern struct wsrep_service_st { bool (*wsrep_thd_ignore_table_func)(MYSQL_THD thd); long long (*wsrep_thd_trx_seqno_func)(const MYSQL_THD thd); my_bool (*wsrep_thd_is_aborting_func)(const MYSQL_THD thd); + my_bool (*wsrep_thd_in_rollback_func)(const THD *thd); void (*wsrep_set_data_home_dir_func)(const char *data_dir); my_bool (*wsrep_thd_is_BF_func)(const MYSQL_THD thd, my_bool sync); my_bool (*wsrep_thd_is_local_func)(const MYSQL_THD thd); @@ -128,6 +129,7 @@ extern struct wsrep_service_st { #define wsrep_set_data_home_dir(A) wsrep_service->wsrep_set_data_home_dir_func(A) #define wsrep_thd_is_BF(T,S) wsrep_service->wsrep_thd_is_BF_func(T,S) #define wsrep_thd_is_aborting(T) wsrep_service->wsrep_thd_is_aborting_func(T) +#define wsrep_thd_in_rollback_func(T) wsrep_service->wsrep_thd_in_rollback_func(T) #define wsrep_thd_is_local(T) wsrep_service->wsrep_thd_is_local_func(T) #define wsrep_thd_self_abort(T) wsrep_service->wsrep_thd_self_abort_func(T) #define wsrep_thd_append_key(T,W,N,K) wsrep_service->wsrep_thd_append_key_func(T,W,N,K) @@ -230,6 +232,8 @@ extern "C" my_bool wsrep_thd_order_before(const MYSQL_THD left, const MYSQL_THD extern "C" my_bool wsrep_thd_skip_locking(const MYSQL_THD thd); /* Return true if thd is aborting */ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd); +/* Return true if thd is a WSREP applier rolling back a transaction locally */ +extern "C" my_bool wsrep_thd_in_rollback(const MYSQL_THD thd); struct wsrep_key; struct wsrep_key_array; diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index d58a05b3eb54e..d5646fa3cb06f 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -295,6 +295,22 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd) return false; } +/** Check if a wsrep applier is rolling back a transaction locally. + +This function is used for notifying InnoDB routines that this thread +is rolling back a wsrep transaction locally. + +@param thd thread handle + +@return true if wsrep applier is rolling back a transaction locally +@return false otherwise + +*/ +extern "C" my_bool wsrep_thd_in_rollback(const THD *thd) +{ + return thd->wsrep_applier_is_in_rollback(); +} + static inline enum wsrep::key::type map_key_type(enum Wsrep_service_key_type type) { diff --git a/sql/sql_class.cc b/sql/sql_class.cc index f5c2006486000..685dbd4e9c7ad 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -742,6 +742,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) , wsrep_applier(is_wsrep_applier), wsrep_applier_closing(false), + wsrep_applier_in_rollback(false), wsrep_client_thread(false), wsrep_retry_counter(0), wsrep_PA_safe(true), @@ -762,7 +763,6 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_affected_rows(0), wsrep_has_ignored_error(false), wsrep_was_on(false), - wsrep_assert_bitmask(0), wsrep_ignore_table(false), wsrep_aborter(0), wsrep_delayed_BF_abort(false), @@ -8660,20 +8660,3 @@ LEX_CSTRING make_string(THD *thd, const char *start_ptr, size_t length= end_ptr - start_ptr; return {strmake_root(thd->mem_root, start_ptr, length), length}; } - - -#ifdef WITH_WSREP -void wsrep_set_assert(THD *thd, uint64 assert, bool is_disabled) -{ - if (is_disabled) { - thd->wsrep_assert_bitmask |= assert; - } else { - thd->wsrep_assert_bitmask &= ~assert; - } -} - -bool wsrep_get_assert(THD *thd, uint64 assert) -{ - return (thd->wsrep_assert_bitmask & assert ? true: false); -} -#endif /* WITH_WSREP */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 6949e1ae0214f..17fc9fe90cbd3 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -5641,6 +5641,8 @@ class THD: public THD_count, /* this must be first */ #ifdef WITH_WSREP bool wsrep_applier; /* dedicated slave applier thread */ bool wsrep_applier_closing; /* applier marked to close */ + bool wsrep_applier_in_rollback; /* applier is rolling + back a transaction */ bool wsrep_client_thread; /* to identify client threads*/ query_id_t wsrep_last_query_id; XID wsrep_xid; @@ -5682,7 +5684,6 @@ class THD: public THD_count, /* this must be first */ bool wsrep_has_ignored_error; /* true if wsrep_on was ON in last wsrep_on_update */ bool wsrep_was_on; - uint64 wsrep_assert_bitmask; /* When enabled, do not replicate/binlog updates from the current table that's @@ -5760,6 +5761,8 @@ class THD: public THD_count, /* this must be first */ Wsrep_applier_service* wsrep_applier_service; /* wait_for_commit struct for binlog group commit */ wait_for_commit wsrep_wfc; + bool wsrep_applier_is_in_rollback() const + { return wsrep_applier_in_rollback; } #endif /* WITH_WSREP */ /* Handling of timeouts for commands */ @@ -8522,11 +8525,5 @@ LEX_CSTRING make_string(THD *thd, const char *start_ptr, #include "deprecation.h" - -#ifdef WITH_WSREP -extern void wsrep_set_assert(THD* thd, uint64 assert, bool is_disabled); -extern bool wsrep_get_assert(THD* thd, uint64 assert); -#endif - #endif /* MYSQL_SERVER */ #endif /* SQL_CLASS_INCLUDED */ diff --git a/sql/sql_plugin_services.inl b/sql/sql_plugin_services.inl index 2114f3560a7ec..6cf8514216edb 100644 --- a/sql/sql_plugin_services.inl +++ b/sql/sql_plugin_services.inl @@ -159,6 +159,7 @@ static struct wsrep_service_st wsrep_handler = { wsrep_thd_ignore_table, wsrep_thd_trx_seqno, wsrep_thd_is_aborting, + wsrep_thd_in_rollback, wsrep_set_data_home_dir, wsrep_thd_is_BF, wsrep_thd_is_local, diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc index d75a5e208a417..580630a1147dd 100644 --- a/sql/wsrep_applier.cc +++ b/sql/wsrep_applier.cc @@ -292,14 +292,14 @@ int wsrep_apply_events(THD* thd, /* rollback to savepoint without telling Wsrep-lib */ thd->variables.wsrep_on = false; - wsrep_set_assert(thd, true, WSREP_ASSERT_INNODB_TRX); + thd->wsrep_applier_in_rollback= true; if (FALSE != trans_rollback_to_savepoint(thd, savepoint)) { thd->variables.wsrep_on = true; - wsrep_set_assert(thd, false, WSREP_ASSERT_INNODB_TRX); + thd->wsrep_applier_in_rollback= false; break; } thd->variables.wsrep_on = true; - wsrep_set_assert(thd, false, WSREP_ASSERT_INNODB_TRX); + thd->wsrep_applier_in_rollback= false; /* reset THD object for retry */ thd->clear_error(); diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc index 8762dd9907e19..95949b9adffdf 100644 --- a/sql/wsrep_dummy.cc +++ b/sql/wsrep_dummy.cc @@ -94,6 +94,9 @@ long long wsrep_thd_trx_seqno(const THD *) my_bool wsrep_thd_is_aborting(const THD *) { return 0; } +my_bool wsrep_thd_in_rollback(const THD *) +{ return 0; } + void wsrep_set_data_home_dir(const char *) { } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 3022d6dc37778..f4b9446c633f6 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1382,10 +1382,6 @@ ATTRIBUTE_NOINLINE static void trx_commit_cleanup(trx_undo_t *&undo) undo= nullptr; } - -extern bool wsrep_get_assert(THD *thd, uint64 assert); - - TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) { /* We already detached from rseg in write_serialisation_history() */ @@ -1505,9 +1501,11 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) trx_finalize_for_fts(this, undo_no != 0); #ifdef WITH_WSREP - if (!mysql_thd || !wsrep_get_assert(mysql_thd, WSREP_ASSERT_INNODB_TRX)) { - ut_ad(is_wsrep() == wsrep_on(mysql_thd)); - } + /* Assert that any transaction which is started as a wsrep + transaction, also commits or rolls back as a wsrep + transaction. Wsrep applier may turn wsrep off temporarily for + rollback when the applying of wsrep transactions is retried. */ + ut_ad(is_wsrep() == wsrep_on(mysql_thd) || wsrep_thd_in_rollback(mysql_thd)); /* Serialization history has been written and the transaction is committed in memory, which makes this commit ordered. Release commit