-
Notifications
You must be signed in to change notification settings - Fork 2
Galera feature: retry applying of write sets at slave nodes #387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 11.4
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
connection node_2; | ||
connection node_1; | ||
CALL mtr.add_suppression("Event .* Update_rows.* apply failed"); | ||
CALL mtr.add_suppression("Inconsistency detected"); | ||
CALL mtr.add_suppression("Failed to apply write set:.*"); | ||
CALL mtr.add_suppression("Event .* Write_rows.* apply failed"); | ||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
CREATE TABLE t3 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
START TRANSACTION; | ||
INSERT INTO t3 (f1, f2) VALUES (1, 'a'); | ||
INSERT INTO t3 (f1, f2) VALUES (2, 'b'); | ||
INSERT INTO t3 (f1, f2) VALUES (3, 'c'), (4, 'd'), (5, 'e'); | ||
COMMIT; | ||
connection node_2; | ||
SET GLOBAL wsrep_applier_retry_count = 2; | ||
SET GLOBAL debug_dbug = "d,apply_event_fail_once:o,/dev/null"; | ||
connection node_1; | ||
START TRANSACTION; | ||
UPDATE t3 SET f2 = 'ax' WHERE f1 = 1; | ||
UPDATE t3 SET f2 = 'bx' WHERE f1 = 2; | ||
INSERT INTO t1 (f1, f2) VALUES (3, 'c'), (4, 'd'), (5, 'e'); | ||
UPDATE t3 SET f2 = 'cx' WHERE f1 = 3; | ||
UPDATE t3 SET f2 = 'dx' WHERE f1 = 4; | ||
DELETE FROM t3 WHERE f1 = 5; | ||
COMMIT; | ||
connection node_2; | ||
connection node_1; | ||
SELECT COUNT(*) AS expect_3 FROM t1; | ||
expect_3 | ||
3 | ||
SELECT COUNT(*) AS expect_4 FROM t3; | ||
expect_4 | ||
4 | ||
connection node_2; | ||
SELECT COUNT(*) AS expect_3 FROM t1; | ||
expect_3 | ||
3 | ||
SELECT COUNT(*) AS expect_4 FROM t3; | ||
expect_4 | ||
4 | ||
connection node_1; | ||
DROP TABLE t1; | ||
DROP TABLE t3; | ||
connection node_2; | ||
Shutting down server ... | ||
SET wsrep_on=OFF; | ||
Restarting server ... | ||
connection node_1; | ||
SET wsrep_sync_wait=0; | ||
CREATE TABLE t2 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
connection node_2; | ||
SET GLOBAL wsrep_applier_retry_count = 2; | ||
SET GLOBAL debug_dbug = ''; | ||
SET GLOBAL debug_dbug = "d,apply_event_fail_always:o,/dev/null"; | ||
connection node_1; | ||
START TRANSACTION; | ||
INSERT INTO t2 (f1, f2) VALUES (1, 'a'), (2, 'b'); | ||
COMMIT; | ||
connection node_2; | ||
Shutting down server ... | ||
SET wsrep_on=OFF; | ||
Restarting server ... | ||
connection node_1; | ||
SET wsrep_sync_wait=0; | ||
connection node_2; | ||
SELECT COUNT(*) AS expect_2 FROM t2; | ||
expect_2 | ||
2 | ||
SET GLOBAL debug_dbug = DEFAULT; | ||
connection node_1; | ||
SET GLOBAL wsrep_applier_retry_count = 0; | ||
SET DEBUG_SYNC = 'RESET'; | ||
DROP TABLE t2; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# | ||
# Test retrying applying of a transaction | ||
# | ||
|
||
--source include/galera_cluster.inc | ||
--source include/have_debug_sync.inc | ||
|
||
CALL mtr.add_suppression("Event .* Update_rows.* apply failed"); | ||
CALL mtr.add_suppression("Inconsistency detected"); | ||
CALL mtr.add_suppression("Failed to apply write set:.*"); | ||
CALL mtr.add_suppression("Event .* Write_rows.* apply failed"); | ||
|
||
# | ||
# Case 1: Retrying succeeds after one retry event, no error is raised. | ||
# | ||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
CREATE TABLE t3 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
|
||
START TRANSACTION; | ||
INSERT INTO t3 (f1, f2) VALUES (1, 'a'); | ||
INSERT INTO t3 (f1, f2) VALUES (2, 'b'); | ||
INSERT INTO t3 (f1, f2) VALUES (3, 'c'), (4, 'd'), (5, 'e'); | ||
COMMIT; | ||
|
||
# wait till the insert transaction has been replicated and committed in node_2 | ||
--connection node_2 | ||
--let $wait_condition = SELECT COUNT(*) > 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't3'; | ||
--source include/wait_condition.inc | ||
--let $wait_condition = SELECT COUNT(*) > 0 FROM t3; | ||
--source include/wait_condition.inc | ||
|
||
SET GLOBAL wsrep_applier_retry_count = 2; | ||
SET GLOBAL debug_dbug = "d,apply_event_fail_once:o,/dev/null"; | ||
|
||
--connection node_1 | ||
START TRANSACTION; | ||
UPDATE t3 SET f2 = 'ax' WHERE f1 = 1; | ||
UPDATE t3 SET f2 = 'bx' WHERE f1 = 2; | ||
INSERT INTO t1 (f1, f2) VALUES (3, 'c'), (4, 'd'), (5, 'e'); | ||
UPDATE t3 SET f2 = 'cx' WHERE f1 = 3; | ||
UPDATE t3 SET f2 = 'dx' WHERE f1 = 4; | ||
DELETE FROM t3 WHERE f1 = 5; | ||
COMMIT; | ||
|
||
# wait till the transaction has been replicated and committed in node_2 | ||
--connection node_2 | ||
--let $wait_condition = SELECT COUNT(*) = 4 FROM t3; | ||
--source include/wait_condition.inc | ||
|
||
--connection node_1 | ||
SELECT COUNT(*) AS expect_3 FROM t1; | ||
SELECT COUNT(*) AS expect_4 FROM t3; | ||
|
||
--connection node_2 | ||
SELECT COUNT(*) AS expect_3 FROM t1; | ||
SELECT COUNT(*) AS expect_4 FROM t3; | ||
|
||
# | ||
# Cleanup after Case 1. | ||
# | ||
|
||
--connection node_1 | ||
DROP TABLE t1; | ||
DROP TABLE t3; | ||
|
||
# shutdown node 2 and restart it | ||
--connection node_2 | ||
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't3'; | ||
--source include/wait_condition.inc | ||
--echo Shutting down server ... | ||
SET wsrep_on=OFF; | ||
temeo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
--source include/shutdown_mysqld.inc | ||
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat | ||
--echo Restarting server ... | ||
--source include/start_mysqld.inc | ||
|
||
# wait till node 2 is back in the cluster | ||
--connection node_1 | ||
SET wsrep_sync_wait=0; | ||
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size' | ||
--source include/wait_condition.inc | ||
|
||
|
||
# | ||
# Case 2: Slave retries applying of a transaction multiple times. All | ||
# retry attempts fail, and the applying will fail with the expected | ||
# error. | ||
# | ||
|
||
CREATE TABLE t2 (f1 INTEGER PRIMARY KEY DEFAULT 0, f2 char(12)); | ||
|
||
--connection node_2 | ||
|
||
SET GLOBAL wsrep_applier_retry_count = 2; | ||
SET GLOBAL debug_dbug = ''; | ||
SET GLOBAL debug_dbug = "d,apply_event_fail_always:o,/dev/null"; | ||
|
||
--connection node_1 | ||
START TRANSACTION; | ||
INSERT INTO t2 (f1, f2) VALUES (1, 'a'), (2, 'b'); | ||
COMMIT; | ||
|
||
# node 2 should crash now, wait for the crash | ||
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size' | ||
--source include/wait_condition.inc | ||
|
||
# restart node 2 | ||
--connection node_2 | ||
--echo Shutting down server ... | ||
SET wsrep_on=OFF; | ||
temeo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
--source include/shutdown_mysqld.inc | ||
--source include/wait_until_disconnected.inc | ||
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here is a race condition: previously a transaction was committed in node_1 and here node_2 will shutdown. But there is no check for the fate of the replicated INSERT from node_1: it may still be replicating or is currently applying or has already committed in node_2. For deterministic test behavior, the state of the INSERT transaction should be synced here or documented if it does not matter for the test result and can be safely ignored There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, as this test phase is supposed to cause sure applier failure, the node should crash, so no need to shutdown it anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed table names in row0ins.c, but Synchronization.
The test does not work without shutting down the server on node 2 after applier failure. |
||
--echo Restarting server ... | ||
--source include/start_mysqld.inc | ||
|
||
# wait till node 2 is back in the cluster | ||
--connection node_1 | ||
SET wsrep_sync_wait=0; | ||
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size' | ||
--source include/wait_condition.inc | ||
|
||
--connection node_2 | ||
--let $wait_condition = SELECT COUNT(*) = 2 FROM t2; | ||
--source include/wait_condition.inc | ||
SELECT COUNT(*) AS expect_2 FROM t2; | ||
SET GLOBAL debug_dbug = DEFAULT; | ||
|
||
# | ||
# Cleanup | ||
# | ||
|
||
--connection node_1 | ||
SET GLOBAL wsrep_applier_retry_count = 0; | ||
SET DEBUG_SYNC = 'RESET'; | ||
DROP TABLE t2; |
Uh oh!
There was an error while loading. Please reload this page.