From 8836a107692c3181b3c393b052077005ed0cdf4a Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 14:48:36 +0300 Subject: [PATCH 1/6] fix: Change evaluation order of circuit breaker and retry mechanisms This commit modifies the order in which circuit breaker and retry mechanisms are evaluated during failover scenarios. Previously, the circuit breaker was applied after all retries were exhausted, which could delay failover in unstable network conditions. Now, the circuit breaker counts each connection error separately, even during retry attempts. --- .../mcf/CircuitBreakerCommandExecutor.java | 2 +- .../failover/FailoverIntegrationTest.java | 54 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/main/java/redis/clients/jedis/mcf/CircuitBreakerCommandExecutor.java b/src/main/java/redis/clients/jedis/mcf/CircuitBreakerCommandExecutor.java index 16b8fd7efb..56be63d93d 100644 --- a/src/main/java/redis/clients/jedis/mcf/CircuitBreakerCommandExecutor.java +++ b/src/main/java/redis/clients/jedis/mcf/CircuitBreakerCommandExecutor.java @@ -35,8 +35,8 @@ public T executeCommand(CommandObject commandObject) { DecorateSupplier supplier = Decorators .ofSupplier(() -> this.handleExecuteCommand(commandObject, cluster)); - supplier.withRetry(cluster.getRetry()); supplier.withCircuitBreaker(cluster.getCircuitBreaker()); + supplier.withRetry(cluster.getRetry()); supplier.withFallback(provider.getFallbackExceptionList(), e -> this.handleClusterFailover(commandObject, cluster.getCircuitBreaker())); diff --git a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java index e24ab0f764..0dbff85638 100644 --- a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java +++ b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java @@ -272,4 +272,56 @@ public void testManualFailoverInflightCommandsWithErrorsPropagateError() throws assertThat(getNodeId(failoverClient.info("server")), equalTo(JEDIS2_ID)); } -} \ No newline at end of file + /** + * Tests that the CircuitBreaker counts each command error separately, and not just after + * all retries are exhausted. This ensures that the circuit breaker opens based on the + * actual number of send commands with failures, and not based on the number of logical operations. + */ + @Test + public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOException { + MultiClusterClientConfig failoverConfig = new MultiClusterClientConfig.Builder( + getClusterConfigs(DefaultJedisClientConfig.builder() + .socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS) + .connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(), + endpoint1, endpoint2)) + .retryMaxAttempts(2) + .retryWaitDuration(1) + .circuitBreakerSlidingWindowType(COUNT_BASED) + .circuitBreakerSlidingWindowSize(3) + .circuitBreakerFailureRateThreshold(50) // 50% failure rate threshold + .circuitBreakerSlidingWindowMinCalls(3) + .build(); + + MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider(failoverConfig); + try ( UnifiedJedis client = new UnifiedJedis(provider)){ + // Verify initial connection to first endpoint + assertThat(getNodeId(client.info("server")), equalTo(JEDIS1_ID)); + + // Disable first endpoint + redisProxy1.disable(); + + // First command should fail and OPEN the circuit breaker immediately + // If CB is applied after retries, it would take : + // 2 commands to OPEN CB (error is propagated for both commands) + // Failover to the next Endpoint happens on the 3rd command + // If CB is applied before retries, it should open after just 1 command + // CB is OPEN after the 2nd retry of the first command, and error is propagated + // Failover to the next Endpoint happens on the 2nd command + // Command 1 + assertThrows(JedisConnectionException.class, () -> client.info("server")); + + // Circuit breaker should be open after just one command with retries + assertThat(provider.getCluster(1).getCircuitBreaker().getState(), + equalTo(CircuitBreaker.State.OPEN)); + + // Next command should be routed to the second endpoint + //Command 2 + assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID)); + + //Command 3 + assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID)); + + } + } + +} From 2f60c0e6691d679374eaec4c22aabbd95d7f1a8e Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 17:49:19 +0300 Subject: [PATCH 2/6] format --- .../failover/FailoverIntegrationTest.java | 57 ++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java index 0dbff85638..50270cca20 100644 --- a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java +++ b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java @@ -273,27 +273,25 @@ public void testManualFailoverInflightCommandsWithErrorsPropagateError() throws } /** - * Tests that the CircuitBreaker counts each command error separately, and not just after - * all retries are exhausted. This ensures that the circuit breaker opens based on the - * actual number of send commands with failures, and not based on the number of logical operations. + * Tests that the CircuitBreaker counts each command error separately, and not just after all + * retries are exhausted. This ensures that the circuit breaker opens based on the actual number + * of send commands with failures, and not based on the number of logical operations. */ @Test public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOException { MultiClusterClientConfig failoverConfig = new MultiClusterClientConfig.Builder( - getClusterConfigs(DefaultJedisClientConfig.builder() - .socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS) - .connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(), - endpoint1, endpoint2)) - .retryMaxAttempts(2) - .retryWaitDuration(1) - .circuitBreakerSlidingWindowType(COUNT_BASED) - .circuitBreakerSlidingWindowSize(3) - .circuitBreakerFailureRateThreshold(50) // 50% failure rate threshold - .circuitBreakerSlidingWindowMinCalls(3) - .build(); - - MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider(failoverConfig); - try ( UnifiedJedis client = new UnifiedJedis(provider)){ + getClusterConfigs( + DefaultJedisClientConfig.builder() + .socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS) + .connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(), + endpoint1, endpoint2)).retryMaxAttempts(2).retryWaitDuration(1) + .circuitBreakerSlidingWindowType(COUNT_BASED).circuitBreakerSlidingWindowSize(3) + .circuitBreakerFailureRateThreshold(50) // 50% failure rate threshold + .circuitBreakerSlidingWindowMinCalls(3).build(); + + MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider( + failoverConfig); + try (UnifiedJedis client = new UnifiedJedis(provider)) { // Verify initial connection to first endpoint assertThat(getNodeId(client.info("server")), equalTo(JEDIS1_ID)); @@ -301,24 +299,31 @@ public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOExc redisProxy1.disable(); // First command should fail and OPEN the circuit breaker immediately - // If CB is applied after retries, it would take : - // 2 commands to OPEN CB (error is propagated for both commands) - // Failover to the next Endpoint happens on the 3rd command - // If CB is applied before retries, it should open after just 1 command - // CB is OPEN after the 2nd retry of the first command, and error is propagated - // Failover to the next Endpoint happens on the 2nd command + // + // If CB is applied after retries: + // - It would take 2 commands to OPEN CB (error is propagated for both commands) + // - Failover to the next Endpoint happens on the 3rd command + // + // If CB is applied before retries: + // - It should open after just 1 command with retries + // - CB is OPEN after the 2nd retry of the first command + // - Failover to the next Endpoint happens on the 2nd command + // + // This test verifies the second case by checking that: + // 1. CB opens after the first command (with retries) + // 2. The second command is routed to the second endpoint // Command 1 assertThrows(JedisConnectionException.class, () -> client.info("server")); // Circuit breaker should be open after just one command with retries assertThat(provider.getCluster(1).getCircuitBreaker().getState(), - equalTo(CircuitBreaker.State.OPEN)); + equalTo(CircuitBreaker.State.OPEN)); // Next command should be routed to the second endpoint - //Command 2 + // Command 2 assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID)); - //Command 3 + // Command 3 assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID)); } From 9d3cfc49ecbb57d96e5bd33158e9bee976bd2861 Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 17:54:00 +0300 Subject: [PATCH 3/6] format --- pom.xml | 2 +- .../jedis/failover/FailoverIntegrationTest.java | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 5dc119ef0d..35e82c0214 100644 --- a/pom.xml +++ b/pom.xml @@ -329,7 +329,7 @@ net.revelc.code.formatter formatter-maven-plugin - 2.16.0 + 2.23.0 ${project.basedir}/hbase-formatter.xml diff --git a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java index 50270cca20..fa832ff44a 100644 --- a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java +++ b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java @@ -139,9 +139,9 @@ public void setup() throws IOException { MultiClusterClientConfig failoverConfig = new MultiClusterClientConfig.Builder( getClusterConfigs(clientConfig, endpoint1, endpoint2)).retryMaxAttempts(1) - .retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) - .circuitBreakerSlidingWindowSize(1).circuitBreakerFailureRateThreshold(100) - .circuitBreakerSlidingWindowMinCalls(1).build(); + .retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) + .circuitBreakerSlidingWindowSize(1).circuitBreakerFailureRateThreshold(100) + .circuitBreakerSlidingWindowMinCalls(1).build(); provider = new MultiClusterPooledConnectionProvider(failoverConfig); failoverClient = new UnifiedJedis(provider); @@ -284,10 +284,11 @@ public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOExc DefaultJedisClientConfig.builder() .socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS) .connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(), - endpoint1, endpoint2)).retryMaxAttempts(2).retryWaitDuration(1) - .circuitBreakerSlidingWindowType(COUNT_BASED).circuitBreakerSlidingWindowSize(3) - .circuitBreakerFailureRateThreshold(50) // 50% failure rate threshold - .circuitBreakerSlidingWindowMinCalls(3).build(); + endpoint1, endpoint2)) + .retryMaxAttempts(2).retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) + .circuitBreakerSlidingWindowSize(3).circuitBreakerFailureRateThreshold(50) // 50% failure + // rate threshold + .circuitBreakerSlidingWindowMinCalls(3).build(); MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider( failoverConfig); From 52f21779e1f5b66dc4343610bfa47c6d76817675 Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 17:57:36 +0300 Subject: [PATCH 4/6] use java-8 compatible formater version --- formatter-pom.xml | 2 +- pom.xml | 2 +- .../jedis/failover/FailoverIntegrationTest.java | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/formatter-pom.xml b/formatter-pom.xml index afeccec764..5f9f3a2567 100644 --- a/formatter-pom.xml +++ b/formatter-pom.xml @@ -20,7 +20,7 @@ net.revelc.code.formatter formatter-maven-plugin - 2.23.0 + 2.16.0 ${project.basedir}/hbase-formatter.xml diff --git a/pom.xml b/pom.xml index 35e82c0214..5dc119ef0d 100644 --- a/pom.xml +++ b/pom.xml @@ -329,7 +329,7 @@ net.revelc.code.formatter formatter-maven-plugin - 2.23.0 + 2.16.0 ${project.basedir}/hbase-formatter.xml diff --git a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java index fa832ff44a..9ac4ac0907 100644 --- a/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java +++ b/src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java @@ -139,9 +139,9 @@ public void setup() throws IOException { MultiClusterClientConfig failoverConfig = new MultiClusterClientConfig.Builder( getClusterConfigs(clientConfig, endpoint1, endpoint2)).retryMaxAttempts(1) - .retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) - .circuitBreakerSlidingWindowSize(1).circuitBreakerFailureRateThreshold(100) - .circuitBreakerSlidingWindowMinCalls(1).build(); + .retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) + .circuitBreakerSlidingWindowSize(1).circuitBreakerFailureRateThreshold(100) + .circuitBreakerSlidingWindowMinCalls(1).build(); provider = new MultiClusterPooledConnectionProvider(failoverConfig); failoverClient = new UnifiedJedis(provider); @@ -284,11 +284,11 @@ public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOExc DefaultJedisClientConfig.builder() .socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS) .connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(), - endpoint1, endpoint2)) - .retryMaxAttempts(2).retryWaitDuration(1).circuitBreakerSlidingWindowType(COUNT_BASED) - .circuitBreakerSlidingWindowSize(3).circuitBreakerFailureRateThreshold(50) // 50% failure - // rate threshold - .circuitBreakerSlidingWindowMinCalls(3).build(); + endpoint1, endpoint2)).retryMaxAttempts(2).retryWaitDuration(1) + .circuitBreakerSlidingWindowType(COUNT_BASED).circuitBreakerSlidingWindowSize(3) + .circuitBreakerFailureRateThreshold(50) // 50% failure + // rate threshold + .circuitBreakerSlidingWindowMinCalls(3).build(); MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider( failoverConfig); From 07aef87154b31fa487c1ea007ad7cdcb2813203b Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 18:41:20 +0300 Subject: [PATCH 5/6] fix AutomaticFailoverTest --- .../jedis/misc/AutomaticFailoverTest.java | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java b/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java index 21fb2535fe..081029a3a4 100644 --- a/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java +++ b/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java @@ -21,6 +21,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @Tag("failover") @@ -91,37 +92,38 @@ public void transactionWithSwitch() { @Test public void commandFailover() { - int slidingWindowMinCalls = 10; - int slidingWindowSize = 10; + int slidingWindowMinCalls = 6; + int slidingWindowSize = 6; MultiClusterClientConfig.Builder builder = new MultiClusterClientConfig.Builder( getClusterConfigs(clientConfig, hostPortWithFailure, workingEndpoint.getHostAndPort())) + .retryMaxAttempts(3) // Default is 3 .circuitBreakerSlidingWindowMinCalls(slidingWindowMinCalls) .circuitBreakerSlidingWindowSize(slidingWindowSize); RedisFailoverReporter failoverReporter = new RedisFailoverReporter(); - MultiClusterPooledConnectionProvider cacheProvider = new MultiClusterPooledConnectionProvider(builder.build()); - cacheProvider.setClusterFailoverPostProcessor(failoverReporter); + MultiClusterPooledConnectionProvider connectionProvider = new MultiClusterPooledConnectionProvider(builder.build()); + connectionProvider.setClusterFailoverPostProcessor(failoverReporter); - UnifiedJedis jedis = new UnifiedJedis(cacheProvider); + UnifiedJedis jedis = new UnifiedJedis(connectionProvider); String key = "hash-" + System.nanoTime(); log.info("Starting calls to Redis"); assertFalse(failoverReporter.failedOver); - for (int attempt = 0; attempt < 10; attempt++) { - try { - jedis.hset(key, "f1", "v1"); - } catch (JedisConnectionException jce) { - // - } - assertFalse(failoverReporter.failedOver); - } + // First call fails - will be retried 3 times + // this will increase the CircuitBreaker failure count to 3 + assertThrows(JedisConnectionException.class, () -> jedis.hset(key, "c1", "v1")); + // Second call fails - will be retried 3 times + // this will increase the CircuitBreaker failure count to 6 // should failover now - jedis.hset(key, "f1", "v1"); + assertThrows(JedisConnectionException.class, () ->jedis.hset(key, "c2", "v1")); + + // CB is in OPEN state now, next call should cause failover + assertEquals(1L, jedis.hset(key, "c3", "v1")); assertTrue(failoverReporter.failedOver); - assertEquals(Collections.singletonMap("f1", "v1"), jedis.hgetAll(key)); + assertEquals(Collections.singletonMap("c3", "v1"), jedis.hgetAll(key)); jedis.flushAll(); jedis.close(); From c32ecbf35a4d05212e751af0fc1ff6cef5c911d1 Mon Sep 17 00:00:00 2001 From: ggivo Date: Wed, 11 Jun 2025 18:45:11 +0300 Subject: [PATCH 6/6] formating --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 5dc119ef0d..7ff9ac54f9 100644 --- a/pom.xml +++ b/pom.xml @@ -335,6 +335,7 @@ ${project.basedir}/src/main/java/redis/clients/jedis/annots ${project.basedir}/src/main/java/redis/clients/jedis/mcf + ${project.basedir}/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java ${project.basedir}/src/test/java/redis/clients/jedis/failover