Skip to content

Commit 2b05f07

Browse files
Update Java SDK retry options for poll operations to match Core SDK. (#1989)
= What was changed * Update default initial interval for non-poll retries to 100ms (from 50ms) * Update default maximum interval for non-poll retries to 5s (was 6s) * Update default backoff factor for non-poll retries to 1.7 (was 2.0) * Update default jitter factor for non-poll retries to 20% (was 10%) * Update default initial interval for poll retries to 200ms (was 50ms) * Update default maximum interval for poll retries to 10s (was 60s) * Update default backoff factor for poll retries to 2.0 (was 1.2) * Update default jitter factor for poll retries to 20% (was 10%) * Code cleanups in argument validation code * Default maximum interval is now computed from initial interval as documented (was hardcoded at 6s) = Why? These changes bring the Java SDK's RPC retry options in line with those of the Core SDK, as the latter have been more carefully reasoned through... with one exception. The Java poller's existing backoff factor of 1.2 was wildly inappropriate, causing the first 10 retries to hammer the server within less than 1.5 seconds. Even more confusingly, Java's maximum interval was far, far too long, as a 60s delay between the server becoming available and the worker noticing that fact is unreasonable and unnecessary. The poller now uses a 2.0 backoff and 10s cap, exactly in line with Core SDK. Regarding Java's non-poll RPCs, OTOH, 2.0 is not as aggressive as Core SDK's choice of 1.5, and yet Core is actually in the wrong here: all 10 permitted retries can complete within the 10-second window expected for namespace migrations in Temporal Cloud. 1.7 seems to be the ideal number here, as it leads to a cumulative delay window of 11.8 to 17.8 seconds. I've used 1.7 here, and Core SDK should be updated to use it as well.
1 parent 78e37a6 commit 2b05f07

File tree

4 files changed

+98
-109
lines changed

4 files changed

+98
-109
lines changed

temporal-serviceclient/src/main/java/io/temporal/serviceclient/RpcRetryOptions.java

Lines changed: 69 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020

2121
package io.temporal.serviceclient;
2222

23+
import static io.temporal.serviceclient.rpcretry.DefaultStubServiceOperationRpcRetryOptions.*;
24+
2325
import com.google.common.base.MoreObjects;
2426
import com.google.common.base.Preconditions;
2527
import com.google.protobuf.GeneratedMessageV3;
2628
import io.grpc.Status;
2729
import io.temporal.internal.common.OptionsUtils;
28-
import io.temporal.serviceclient.rpcretry.DefaultStubServiceOperationRpcRetryOptions;
2930
import java.time.Duration;
3031
import java.util.ArrayList;
3132
import java.util.Collections;
@@ -119,20 +120,16 @@ private Builder(RpcRetryOptions options) {
119120

120121
/**
121122
* Interval of the first retry, on regular failures. If coefficient is 1.0 then it is used for
122-
* all retries. Defaults to 50ms.
123+
* all retries. Defaults to 100ms.
123124
*
124125
* @param initialInterval Interval to wait on first retry. Default will be used if set to {@code
125126
* null}.
126127
*/
127128
public Builder setInitialInterval(Duration initialInterval) {
128-
if (initialInterval != null) {
129-
if (initialInterval.isNegative() || initialInterval.isZero()) {
130-
throw new IllegalArgumentException("Invalid interval: " + initialInterval);
131-
}
132-
this.initialInterval = initialInterval;
133-
} else {
134-
this.initialInterval = null;
129+
if (isInvalidDuration(initialInterval)) {
130+
throw new IllegalArgumentException("invalid interval: " + initialInterval);
135131
}
132+
this.initialInterval = initialInterval;
136133
return this;
137134
}
138135

@@ -144,14 +141,10 @@ public Builder setInitialInterval(Duration initialInterval) {
144141
* Defaults to 1000ms, which is used if set to {@code null}.
145142
*/
146143
public Builder setCongestionInitialInterval(Duration congestionInitialInterval) {
147-
if (initialInterval != null) {
148-
if (congestionInitialInterval.isNegative() || congestionInitialInterval.isZero()) {
149-
throw new IllegalArgumentException("Invalid interval: " + congestionInitialInterval);
150-
}
151-
this.congestionInitialInterval = congestionInitialInterval;
152-
} else {
153-
this.congestionInitialInterval = null;
144+
if (isInvalidDuration(congestionInitialInterval)) {
145+
throw new IllegalArgumentException("invalid interval: " + congestionInitialInterval);
154146
}
147+
this.congestionInitialInterval = congestionInitialInterval;
155148
return this;
156149
}
157150

@@ -165,33 +158,26 @@ public Builder setCongestionInitialInterval(Duration congestionInitialInterval)
165158
* null}.
166159
*/
167160
public Builder setExpiration(Duration expiration) {
168-
if (expiration != null) {
169-
if (expiration.isNegative() || expiration.isZero()) {
170-
throw new IllegalArgumentException("Invalid interval: " + expiration);
171-
}
172-
this.expiration = expiration;
173-
} else {
174-
this.expiration = null;
161+
if (isInvalidDuration(expiration)) {
162+
throw new IllegalArgumentException("invalid interval: " + expiration);
175163
}
164+
this.expiration = expiration;
176165
return this;
177166
}
178167

179168
/**
180169
* Coefficient used to calculate the next retry interval. The next retry interval is previous
181-
* interval multiplied by this coefficient. Must be 1 or larger. Default is 2.0.
170+
* interval multiplied by this coefficient. Must be 1 or larger. Default is 1.5.
182171
*
183172
* @param backoffCoefficient Coefficient used to calculate the next retry interval. Defaults to
184173
* 2.0, which is used if set to {@code 0}.
185174
*/
186175
public Builder setBackoffCoefficient(double backoffCoefficient) {
187-
if (backoffCoefficient != 0.0) {
188-
if (!Double.isFinite(backoffCoefficient) || (backoffCoefficient < 1.0)) {
189-
throw new IllegalArgumentException("coefficient has to be >= 1.0: " + backoffCoefficient);
190-
}
191-
this.backoffCoefficient = backoffCoefficient;
192-
} else {
193-
this.backoffCoefficient = 0.0;
176+
if (isInvalidBackoffCoefficient(backoffCoefficient)) {
177+
throw new IllegalArgumentException(
178+
"coefficient must be >= 1.0 and finite: " + backoffCoefficient);
194179
}
180+
this.backoffCoefficient = backoffCoefficient;
195181
return this;
196182
}
197183

@@ -206,7 +192,7 @@ public Builder setBackoffCoefficient(double backoffCoefficient) {
206192
* set to {@code 0}.
207193
*/
208194
public Builder setMaximumAttempts(int maximumAttempts) {
209-
if (maximumAttempts < 0) {
195+
if (isInvalidMaxAttempts(maximumAttempts)) {
210196
throw new IllegalArgumentException("Invalid maximumAttempts: " + maximumAttempts);
211197
}
212198
this.maximumAttempts = maximumAttempts;
@@ -216,41 +202,31 @@ public Builder setMaximumAttempts(int maximumAttempts) {
216202
/**
217203
* Maximum interval between retries. Exponential backoff leads to interval increase. This value
218204
* is the cap of the increase. <br>
219-
* Default is 100x of initial interval. Can't be less than {@link #setInitialInterval(Duration)}
205+
* Default is 50x of initial interval. Can't be less than {@link #setInitialInterval(Duration)}
220206
*
221-
* @param maximumInterval the maximum interval value. Defaults to 100x initial interval, which
222-
* is used if set to {@code null}.
207+
* @param maximumInterval the maximum interval value. Defaults to 50x initial interval, which is
208+
* used if set to {@code null}.
223209
*/
224210
public Builder setMaximumInterval(Duration maximumInterval) {
225-
if (maximumInterval != null) {
226-
if ((maximumInterval.isNegative() || maximumInterval.isZero())) {
227-
throw new IllegalArgumentException("Invalid interval: " + maximumInterval);
228-
}
229-
this.maximumInterval = maximumInterval;
230-
} else {
231-
this.maximumInterval = null;
211+
if (isInvalidDuration(maximumInterval)) {
212+
throw new IllegalArgumentException("invalid interval: " + maximumInterval);
232213
}
214+
this.maximumInterval = maximumInterval;
233215
return this;
234216
}
235217

236218
/**
237219
* Maximum amount of jitter to apply. 0.2 means that actual retry time can be +/- 20% of the
238-
* calculated time. Set to 0 to disable jitter. Must be lower than 1. Default is 0.1.
220+
* calculated time. Set to 0 to disable jitter. Must be lower than 1. Default is 0.2.
239221
*
240222
* @param maximumJitterCoefficient Maximum amount of jitter. Default will be used if set to -1.
241223
*/
242224
public Builder setMaximumJitterCoefficient(double maximumJitterCoefficient) {
243-
if (maximumJitterCoefficient != -1.0) {
244-
if (!Double.isFinite(maximumJitterCoefficient)
245-
|| maximumJitterCoefficient < 0
246-
|| maximumJitterCoefficient >= 1.0) {
247-
throw new IllegalArgumentException(
248-
"maximumJitterCoefficient has to be >= 0 and < 1.0: " + maximumJitterCoefficient);
249-
}
250-
this.maximumJitterCoefficient = maximumJitterCoefficient;
251-
} else {
252-
this.maximumJitterCoefficient = -1.0;
225+
if (isInvalidJitterCoefficient(maximumJitterCoefficient)) {
226+
throw new IllegalArgumentException(
227+
"coefficient must be >= 0 and < 1.0: " + maximumJitterCoefficient);
253228
}
229+
this.maximumJitterCoefficient = maximumJitterCoefficient;
254230
return this;
255231
}
256232

@@ -334,7 +310,7 @@ private List<DoNotRetryItem> merge(List<DoNotRetryItem> o1, List<DoNotRetryItem>
334310
if (o2 != null) {
335311
return new ArrayList<>(o2);
336312
}
337-
if (o1.size() > 0) {
313+
if (o1 != null && !o1.isEmpty()) {
338314
return new ArrayList<>(o1);
339315
}
340316
return null;
@@ -364,51 +340,63 @@ public RpcRetryOptions buildWithDefaultsFrom(RpcRetryOptions rpcRetryOptions) {
364340

365341
/** Validates property values and builds RetryOptions with default values. */
366342
public RpcRetryOptions validateBuildWithDefaults() {
367-
double backoff = backoffCoefficient;
368-
if (backoff == 0d) {
369-
backoff = DefaultStubServiceOperationRpcRetryOptions.BACKOFF;
343+
double backoffCoefficient = this.backoffCoefficient;
344+
if (backoffCoefficient < 1) {
345+
backoffCoefficient = BACKOFF;
370346
}
371-
if (initialInterval == null || initialInterval.isZero() || initialInterval.isNegative()) {
372-
initialInterval = DefaultStubServiceOperationRpcRetryOptions.INITIAL_INTERVAL;
347+
Duration initialInterval = this.initialInterval;
348+
if (initialInterval == null) {
349+
initialInterval = INITIAL_INTERVAL;
373350
}
374-
if (congestionInitialInterval == null
375-
|| congestionInitialInterval.isZero()
376-
|| congestionInitialInterval.isNegative()) {
377-
congestionInitialInterval =
378-
DefaultStubServiceOperationRpcRetryOptions.CONGESTION_INITIAL_INTERVAL;
351+
Duration congestionInitialInterval = this.congestionInitialInterval;
352+
if (congestionInitialInterval == null) {
353+
congestionInitialInterval = CONGESTION_INITIAL_INTERVAL;
379354
}
380-
if (expiration == null || expiration.isZero() || expiration.isNegative()) {
381-
expiration = DefaultStubServiceOperationRpcRetryOptions.EXPIRATION_INTERVAL;
355+
Duration expiration = this.expiration;
356+
if (expiration == null) {
357+
expiration = EXPIRATION_INTERVAL;
382358
}
383-
384-
Duration maxInterval = this.maximumInterval;
385-
386-
if (maxInterval == null || maxInterval.isZero() || maxInterval.isNegative()) {
387-
if (maximumAttempts == 0) {
388-
maxInterval = DefaultStubServiceOperationRpcRetryOptions.MAXIMUM_INTERVAL;
389-
} else {
390-
maxInterval = null;
391-
}
359+
Duration maximumInterval = this.maximumInterval;
360+
if (maximumInterval == null && maximumAttempts == 0) {
361+
maximumInterval = initialInterval.multipliedBy(MAXIMUM_INTERVAL_MULTIPLIER);
392362
}
393-
394-
if (maximumJitterCoefficient == -1.0) {
395-
maximumJitterCoefficient =
396-
DefaultStubServiceOperationRpcRetryOptions.MAXIMUM_JITTER_COEFFICIENT;
363+
double maximumJitterCoefficient = this.maximumJitterCoefficient;
364+
if (maximumJitterCoefficient < 0) {
365+
maximumJitterCoefficient = MAXIMUM_JITTER_COEFFICIENT;
397366
}
398367

399368
RpcRetryOptions result =
400369
new RpcRetryOptions(
401370
initialInterval,
402371
congestionInitialInterval,
403-
backoff,
372+
backoffCoefficient,
404373
expiration,
405374
maximumAttempts,
406-
maxInterval,
375+
maximumInterval,
407376
maximumJitterCoefficient,
408377
MoreObjects.firstNonNull(doNotRetry, Collections.emptyList()));
409378
result.validate();
410379
return result;
411380
}
381+
382+
private static boolean isInvalidDuration(Duration d) {
383+
if (d == null) {
384+
return false;
385+
}
386+
return d.isNegative() || d.isZero();
387+
}
388+
389+
private static boolean isInvalidMaxAttempts(int i) {
390+
return i < 0;
391+
}
392+
393+
private static boolean isInvalidBackoffCoefficient(double v) {
394+
return !Double.isFinite(v) || (v != 0.0 && v < 1.0);
395+
}
396+
397+
private static boolean isInvalidJitterCoefficient(double v) {
398+
return !Double.isFinite(v) || (v != -1.0 && (v < 0.0 || v >= 1.0));
399+
}
412400
}
413401

414402
private final Duration initialInterval;

temporal-serviceclient/src/main/java/io/temporal/serviceclient/rpcretry/DefaultStubLongPollRpcRetryOptions.java

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@
2525

2626
/** Default rpc retry options for long polls like waiting for the workflow finishing and result. */
2727
public class DefaultStubLongPollRpcRetryOptions {
28-
public static final Duration INITIAL_INTERVAL = Duration.ofMillis(50);
28+
29+
public static final Duration INITIAL_INTERVAL = Duration.ofMillis(200);
2930
public static final Duration CONGESTION_INITIAL_INTERVAL = Duration.ofMillis(1000);
30-
public static final Duration MAXIMUM_INTERVAL = Duration.ofMinutes(1);
31-
public static final double BACKOFF = 1.2;
32-
public static final double MAXIMUM_JITTER_COEFFICIENT = 0.1;
31+
public static final Duration MAXIMUM_INTERVAL = Duration.ofSeconds(10);
32+
public static final double BACKOFF = 2.0;
33+
public static final double MAXIMUM_JITTER_COEFFICIENT = 0.2;
3334

3435
// partial build because expiration is not set, long polls work with absolute deadlines instead
3536
public static final RpcRetryOptions INSTANCE = getBuilder().build();
@@ -41,14 +42,11 @@ public class DefaultStubLongPollRpcRetryOptions {
4142
}
4243

4344
private static RpcRetryOptions.Builder getBuilder() {
44-
RpcRetryOptions.Builder roBuilder =
45-
RpcRetryOptions.newBuilder()
46-
.setInitialInterval(INITIAL_INTERVAL)
47-
.setCongestionInitialInterval(CONGESTION_INITIAL_INTERVAL)
48-
.setBackoffCoefficient(BACKOFF)
49-
.setMaximumInterval(MAXIMUM_INTERVAL)
50-
.setMaximumJitterCoefficient(MAXIMUM_JITTER_COEFFICIENT);
51-
52-
return roBuilder;
45+
return RpcRetryOptions.newBuilder()
46+
.setInitialInterval(INITIAL_INTERVAL)
47+
.setCongestionInitialInterval(CONGESTION_INITIAL_INTERVAL)
48+
.setBackoffCoefficient(BACKOFF)
49+
.setMaximumInterval(MAXIMUM_INTERVAL)
50+
.setMaximumJitterCoefficient(MAXIMUM_JITTER_COEFFICIENT);
5351
}
5452
}

temporal-serviceclient/src/main/java/io/temporal/serviceclient/rpcretry/DefaultStubServiceOperationRpcRetryOptions.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,19 @@
2929
* finishing).
3030
*/
3131
public class DefaultStubServiceOperationRpcRetryOptions {
32-
public static final Duration INITIAL_INTERVAL = Duration.ofMillis(50);
32+
33+
public static final Duration INITIAL_INTERVAL = Duration.ofMillis(100);
3334
public static final Duration CONGESTION_INITIAL_INTERVAL = Duration.ofMillis(1000);
3435
public static final Duration EXPIRATION_INTERVAL = Duration.ofMinutes(1);
35-
public static final Duration MAXIMUM_INTERVAL;
36-
public static final double BACKOFF = 2;
37-
public static final double MAXIMUM_JITTER_COEFFICIENT = 0.1;
36+
public static final int MAXIMUM_INTERVAL_MULTIPLIER = 50;
37+
public static final Duration MAXIMUM_INTERVAL =
38+
INITIAL_INTERVAL.multipliedBy(MAXIMUM_INTERVAL_MULTIPLIER);
39+
public static final double BACKOFF = 1.7;
40+
public static final double MAXIMUM_JITTER_COEFFICIENT = 0.2;
3841

3942
public static final RpcRetryOptions INSTANCE;
4043

4144
static {
42-
Duration maxInterval = EXPIRATION_INTERVAL.dividedBy(10);
43-
if (maxInterval.compareTo(INITIAL_INTERVAL) < 0) {
44-
maxInterval = INITIAL_INTERVAL;
45-
}
46-
MAXIMUM_INTERVAL = maxInterval;
47-
4845
INSTANCE = getBuilder().validateBuildWithDefaults();
4946
}
5047

temporal-serviceclient/src/test/java/io/temporal/internal/retryer/GrpcSyncRetryerTest.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
package io.temporal.internal.retryer;
2222

23+
import static io.temporal.serviceclient.rpcretry.DefaultStubServiceOperationRpcRetryOptions.CONGESTION_INITIAL_INTERVAL;
24+
import static io.temporal.serviceclient.rpcretry.DefaultStubServiceOperationRpcRetryOptions.MAXIMUM_JITTER_COEFFICIENT;
2325
import static org.junit.Assert.*;
2426

2527
import io.grpc.Context;
@@ -101,8 +103,9 @@ public void testDoNotRetry() {
101103
try {
102104
DEFAULT_SYNC_RETRYER.retry(
103105
() -> {
104-
if (attempts.incrementAndGet() > 1)
106+
if (attempts.incrementAndGet() > 1) {
105107
fail("We should not retry on exception that we specified to don't retry");
108+
}
106109
throw new StatusRuntimeException(Status.fromCode(STATUS_CODE));
107110
},
108111
new GrpcRetryer.GrpcRetryerOptions(options, null),
@@ -130,7 +133,9 @@ public void testInterruptedException() {
130133
try {
131134
DEFAULT_SYNC_RETRYER.retry(
132135
() -> {
133-
if (attempts.incrementAndGet() > 1) fail("We should not retry on InterruptedException");
136+
if (attempts.incrementAndGet() > 1) {
137+
fail("We should not retry on InterruptedException");
138+
}
134139
throw new InterruptedException();
135140
},
136141
new GrpcRetryer.GrpcRetryerOptions(options, null),
@@ -156,8 +161,9 @@ public void testNotStatusRuntimeException() {
156161
try {
157162
DEFAULT_SYNC_RETRYER.retry(
158163
() -> {
159-
if (attempts.incrementAndGet() > 1)
164+
if (attempts.incrementAndGet() > 1) {
160165
fail("We should not retry if the exception is not StatusRuntimeException");
166+
}
161167
throw new IllegalArgumentException("simulated");
162168
},
163169
new GrpcRetryer.GrpcRetryerOptions(options, null),
@@ -335,7 +341,7 @@ public void testCongestionAndJitterAreNotMandatory() {
335341
assertEquals(3, options.getMaximumAttempts());
336342

337343
// The following were added latter; they must silently use default values if unspecified
338-
assertEquals(Duration.ofMillis(1000), options.getCongestionInitialInterval());
339-
assertEquals(0.1, options.getMaximumJitterCoefficient(), 0.01);
344+
assertEquals(CONGESTION_INITIAL_INTERVAL, options.getCongestionInitialInterval());
345+
assertEquals(MAXIMUM_JITTER_COEFFICIENT, options.getMaximumJitterCoefficient(), 0.01);
340346
}
341347
}

0 commit comments

Comments
 (0)