Skip to content

Commit 69bde8d

Browse files
committed
Refactored timeoutExecutor to be a member variable. 🫩
1 parent 7c83e8e commit 69bde8d

File tree

2 files changed

+27
-36
lines changed

2 files changed

+27
-36
lines changed

clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@
164164
import java.util.Properties;
165165
import java.util.Set;
166166
import java.util.concurrent.Executors;
167-
import java.util.concurrent.Future;
168167
import java.util.concurrent.ScheduledExecutorService;
169168
import java.util.concurrent.TimeUnit;
170169
import java.util.stream.Collectors;
@@ -247,6 +246,7 @@ public class VenicePushJob implements AutoCloseable {
247246
private final PushJobHeartbeatSenderFactory pushJobHeartbeatSenderFactory;
248247
private PushJobHeartbeatSender pushJobHeartbeatSender = null;
249248
private boolean pushJobStatusUploadDisabledHasBeenLogged = false;
249+
private final ScheduledExecutorService timeoutExecutor;
250250

251251
/**
252252
* @param jobId id of the job
@@ -255,6 +255,7 @@ public class VenicePushJob implements AutoCloseable {
255255
public VenicePushJob(String jobId, Properties vanillaProps) {
256256
this.jobId = jobId;
257257
this.props = getVenicePropsFromVanillaProps(Objects.requireNonNull(vanillaProps, "VPJ props cannot be null"));
258+
this.timeoutExecutor = Executors.newSingleThreadScheduledExecutor();
258259
LOGGER.info("Constructing {}: {}", VenicePushJob.class.getSimpleName(), props.toString(true));
259260
this.sslProperties = Lazy.of(() -> {
260261
try {
@@ -655,36 +656,23 @@ DataWriterComputeJob getDataWriterComputeJob() {
655656
* @throws VeniceException
656657
*/
657658
public void run() {
658-
Optional<SSLFactory> sslFactory = VPJSSLUtils.createSSLFactory(
659-
pushJobSetting.enableSSL,
660-
props.getString(SSL_FACTORY_CLASS_NAME, DEFAULT_SSL_FACTORY_CLASS_NAME),
661-
this.sslProperties);
662-
initControllerClient(pushJobSetting.storeName, sslFactory);
663-
664-
ScheduledExecutorService timeoutExecutor = Executors.newSingleThreadScheduledExecutor();
665-
long bootstrapToOnlineTimeoutInHours =
666-
getStoreResponse(pushJobSetting.storeName).getStore().getBootstrapToOnlineTimeoutInHours();
667-
Future<?> timeoutFuture = timeoutExecutor.schedule(() -> {
668-
LOGGER.error("Timeout reached. Stopping the job.");
669-
cancel();
670-
}, bootstrapToOnlineTimeoutInHours, TimeUnit.HOURS);
671-
672-
try {
673-
runPushJob();
674-
} finally {
675-
timeoutFuture.cancel(true);
676-
timeoutExecutor.shutdown();
677-
}
678-
}
679-
680-
private void runPushJob() {
681659
try {
660+
initControllerClient(pushJobSetting.storeName);
682661
pushJobSetting.clusterName = controllerClient.getClusterName();
683662
LOGGER.info(
684663
"The store {} is discovered in Venice cluster {}",
685664
pushJobSetting.storeName,
686665
pushJobSetting.clusterName);
687666

667+
long bootstrapToOnlineTimeoutInHours =
668+
getStoreResponse(pushJobSetting.storeName).getStore().getBootstrapToOnlineTimeoutInHours();
669+
timeoutExecutor.schedule(() -> {
670+
cancel();
671+
throw new VeniceException(
672+
"Failing push-job for store " + pushJobSetting.storeName + " which is still running after "
673+
+ bootstrapToOnlineTimeoutInHours + " hours.");
674+
}, bootstrapToOnlineTimeoutInHours, TimeUnit.HOURS);
675+
688676
if (pushJobSetting.isSourceKafka) {
689677
initKIFRepushDetails();
690678
}
@@ -1207,9 +1195,12 @@ protected InputDataInfoProvider getInputDataInfoProvider() {
12071195
* 2. A mock controller client is provided
12081196
*
12091197
* @param storeName
1210-
* @param sslFactory
12111198
*/
1212-
private void initControllerClient(String storeName, Optional<SSLFactory> sslFactory) {
1199+
private void initControllerClient(String storeName) {
1200+
Optional<SSLFactory> sslFactory = VPJSSLUtils.createSSLFactory(
1201+
pushJobSetting.enableSSL,
1202+
props.getString(SSL_FACTORY_CLASS_NAME, DEFAULT_SSL_FACTORY_CLASS_NAME),
1203+
this.sslProperties);
12131204
final String controllerD2ZkHost;
12141205
if (pushJobSetting.multiRegion) {
12151206
// In multi region mode, push jobs will communicate with parent controller
@@ -2520,8 +2511,6 @@ private String pushJobPropertiesToString(
25202511
/**
25212512
* A cancel method for graceful cancellation of the running Job to be invoked as a result of user actions or due to
25222513
* the job exceeding bootstrapToOnlineTimeoutInHours.
2523-
*
2524-
* @throws Exception
25252514
*/
25262515
public void cancel() {
25272516
killJob(pushJobSetting, controllerClient);
@@ -2633,6 +2622,7 @@ private static Path getLatestPath(Path path, FileSystem fs) throws IOException {
26332622

26342623
@Override
26352624
public void close() {
2625+
timeoutExecutor.shutdownNow();
26362626
closeVeniceWriter();
26372627
Utils.closeQuietlyWithErrorLogged(dataWriterComputeJob);
26382628
Utils.closeQuietlyWithErrorLogged(controllerClient);

clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/VenicePushJobTest.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,21 @@ public void testPushJobSettingWithLivenessHeartbeat() {
278278
}
279279
}
280280

281+
@DataProvider(name = "DataWriterJobClasses")
282+
public Object[][] getDataWriterJobClasses() {
283+
return new Object[][] { { DataWriterMRJob.class }, { DataWriterSparkJob.class } };
284+
}
285+
281286
/**
282287
* Test that VenicePushJob.cancel() is called after bootstrapToOnlineTimeoutInHours is reached.
283288
* UNKNOWN status is returned for pollStatusUntilComplete() to stall the job until cancel() can be called.
284289
*/
285-
@Test
286-
public void testPushJobTimeout() throws Exception {
290+
@Test(dataProvider = "DataWriterJobClasses")
291+
public void testPushJobTimeout(Class<? extends DataWriterComputeJob> dataWriterJobClass) throws Exception {
287292
Properties props = getVpjRequiredProperties();
288293
props.put(KEY_FIELD_PROP, "id");
289294
props.put(VALUE_FIELD_PROP, "name");
295+
props.put(DATA_WRITER_COMPUTE_JOB_CLASS, dataWriterJobClass.getName());
290296
ControllerClient client = getClient();
291297
JobStatusQueryResponse response = mock(JobStatusQueryResponse.class);
292298
doReturn("UNKNOWN").when(response).getStatus();
@@ -312,16 +318,11 @@ public void testPushJobTimeout() throws Exception {
312318
}
313319
}
314320

315-
@DataProvider(name = "DataWriterJobClasses")
316-
public Object[][] getDataWriterJobClasses() {
317-
return new Object[][] { { DataWriterMRJob.class }, { DataWriterSparkJob.class } };
318-
}
319-
320321
/**
321322
* Ensures that the data writer job is killed if the job times out. Uses an Answer to stall the data writer job
322323
* while it's running in order for it to get killed properly.
323324
*/
324-
@Test(timeOut = 5 * Time.MS_PER_SECOND, dataProvider = "DataWriterJobClasses")
325+
@Test(dataProvider = "DataWriterJobClasses")
325326
public void testDataWriterComputeJobTimeout(Class<? extends DataWriterComputeJob> dataWriterJobClass)
326327
throws Exception {
327328
Properties props = getVpjRequiredProperties();

0 commit comments

Comments
 (0)