Skip to content

Commit

Permalink
Retry CCR shard follow task when no seed node left (#63225)
Browse files Browse the repository at this point in the history
If the connection between clusters is disconnected or the leader cluster
is offline, then CCR shard-follow tasks can stop with "no seed node
left". CCR should retry on this error.
  • Loading branch information
dnhatn committed Oct 6, 2020
1 parent cd38a51 commit 2f119bc
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ static boolean shouldRetry(final Exception e) {
actual instanceof NodeClosedException ||
actual instanceof NoSuchRemoteClusterException ||
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed")) ||
(actual instanceof IllegalStateException && "no seed node left".equals(actual.getMessage())) ||
actual instanceof EsRejectedExecutionException ||
actual instanceof CircuitBreakingException;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,11 @@ public void testReceiveRetryableError() {
int max = randomIntBetween(1, 30);
final Exception[] exceptions = new Exception[max];
for (int i = 0; i < max; i++) {
final Exception exception;
if (randomBoolean()) {
exception = new ShardNotFoundException(new ShardId("leader_index", "", 0));
} else {
exception = new EsRejectedExecutionException("leader_index rejected");
}
final Exception exception = randomFrom(
new ShardNotFoundException(new ShardId("leader_index", "", 0)),
new EsRejectedExecutionException("leader_index rejected"),
new IllegalStateException("no seed node left")
);
exceptions[i] = exception;
readFailures.add(exception);
}
Expand All @@ -286,16 +285,21 @@ public void testReceiveRetryableError() {
final Map.Entry<Long, Tuple<Integer, ElasticsearchException>> entry = status.readExceptions().entrySet().iterator().next();
assertThat(entry.getValue().v1(), equalTo(Math.toIntExact(retryCounter.get())));
assertThat(entry.getKey(), equalTo(0L));
if (exceptions[Math.toIntExact(retryCounter.get()) - 1] instanceof ShardNotFoundException) {
final Exception error = exceptions[Math.toIntExact(retryCounter.get()) - 1];
if (error instanceof ShardNotFoundException) {
assertThat(entry.getValue().v2(), instanceOf(ShardNotFoundException.class));
final ShardNotFoundException shardNotFoundException = (ShardNotFoundException) entry.getValue().v2();
assertThat(shardNotFoundException.getShardId().getIndexName(), equalTo("leader_index"));
assertThat(shardNotFoundException.getShardId().getId(), equalTo(0));
} else {
} else if (error instanceof EsRejectedExecutionException) {
assertThat(entry.getValue().v2().getCause(), instanceOf(EsRejectedExecutionException.class));
final EsRejectedExecutionException rejectedExecutionException =
(EsRejectedExecutionException) entry.getValue().v2().getCause();
assertThat(rejectedExecutionException.getMessage(), equalTo("leader_index rejected"));
} else {
assertThat(entry.getValue().v2().getCause(), instanceOf(IllegalStateException.class));
final IllegalStateException noSeedError = (IllegalStateException) entry.getValue().v2().getCause();
assertThat(noSeedError.getMessage(), equalTo("no seed node left"));
}
}
retryCounter.incrementAndGet();
Expand Down

0 comments on commit 2f119bc

Please sign in to comment.