Skip to content

Commit 4053da5

Browse files
committed
Merge branch 'main' into client_add_key_count_metric
2 parents f662248 + 50fdca4 commit 4053da5

File tree

24 files changed

+802
-132
lines changed

24 files changed

+802
-132
lines changed

.github/rawWorkflows/gh-ci-parameterized-flow.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
echo "Repository name: ${{ github.repository }}"
4848
echo "event name: ${{ github.event_name }}"
4949
find . -path "**/build/reports/*" -or -path "**/build/test-results/*" > artifacts.list
50+
find . -type f \( -name 'core.*' -o -name 'hs_err_pid*.log' \) -exec xz -9 {} \; -exec echo "{}.xz" \; >> artifacts.list
5051
rsync -R --files-from=artifacts.list . ${{ github.job }}-artifacts
5152
tar -zcvf ${{ github.job }}-jdk${{ matrix.jdk }}-logs.tar.gz ${{ github.job }}-artifacts
5253
- name: Generate Fork Repo Test Reports

.github/workflows/VeniceCI-E2ETests.yml

Lines changed: 31 additions & 0 deletions
Large diffs are not rendered by default.

clients/da-vinci-client/src/main/java/com/linkedin/davinci/config/VeniceServerConfig.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import static com.linkedin.venice.ConfigKeys.SERVER_COMPUTE_FAST_AVRO_ENABLED;
7474
import static com.linkedin.venice.ConfigKeys.SERVER_COMPUTE_QUEUE_CAPACITY;
7575
import static com.linkedin.venice.ConfigKeys.SERVER_COMPUTE_THREAD_NUM;
76+
import static com.linkedin.venice.ConfigKeys.SERVER_CONSUMER_POLL_TRACKER_STALE_THRESHOLD_IN_SECONDS;
7677
import static com.linkedin.venice.ConfigKeys.SERVER_CONSUMER_POOL_ALLOCATION_STRATEGY;
7778
import static com.linkedin.venice.ConfigKeys.SERVER_CONSUMER_POOL_SIZE_FOR_CURRENT_VERSION_AA_WC_LEADER;
7879
import static com.linkedin.venice.ConfigKeys.SERVER_CONSUMER_POOL_SIZE_FOR_CURRENT_VERSION_NON_AA_WC_LEADER;
@@ -637,6 +638,7 @@ public class VeniceServerConfig extends VeniceClusterConfig {
637638
Arrays.asList(0.4D, 0.6D, 0.8D, 1.0D, 1.2D, 1.4D, 1.6D);
638639

639640
private final boolean isParticipantMessageStoreEnabled;
641+
private final long consumerPollTrackerStaleThresholdInSeconds;
640642

641643
public VeniceServerConfig(VeniceProperties serverProperties) throws ConfigurationException {
642644
this(serverProperties, Collections.emptyMap());
@@ -1087,6 +1089,8 @@ public VeniceServerConfig(VeniceProperties serverProperties, Map<String, Map<Str
10871089
serverProperties.getInt(SERVER_LOAD_CONTROLLER_MULTI_GET_LATENCY_ACCEPT_THRESHOLD_IN_MS, 100);
10881090
loadControllerComputeLatencyAcceptThresholdMs =
10891091
serverProperties.getInt(SERVER_LOAD_CONTROLLER_COMPUTE_LATENCY_ACCEPT_THRESHOLD_IN_MS, 100);
1092+
consumerPollTrackerStaleThresholdInSeconds = serverProperties
1093+
.getLong(SERVER_CONSUMER_POLL_TRACKER_STALE_THRESHOLD_IN_SECONDS, TimeUnit.MINUTES.toSeconds(15));
10901094
}
10911095

10921096
List<Double> extractThrottleLimitFactorsFor(VeniceProperties serverProperties, String configKey) {
@@ -2012,4 +2016,8 @@ public int getLoadControllerMultiGetLatencyAcceptThresholdMs() {
20122016
public int getLoadControllerComputeLatencyAcceptThresholdMs() {
20132017
return loadControllerComputeLatencyAcceptThresholdMs;
20142018
}
2019+
2020+
public long getConsumerPollTrackerStaleThresholdSeconds() {
2021+
return consumerPollTrackerStaleThresholdInSeconds;
2022+
}
20152023
}

clients/da-vinci-client/src/main/java/com/linkedin/davinci/kafka/consumer/AbstractKafkaConsumerService.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ public abstract Map<PubSubTopicPartition, TopicPartitionIngestionInfo> getIngest
4646
PubSubTopic versionTopic,
4747
PubSubTopicPartition pubSubTopicPartition);
4848

49+
public abstract Map<PubSubTopicPartition, Long> getStaleTopicPartitions(long thresholdTimestamp);
4950
}

clients/da-vinci-client/src/main/java/com/linkedin/davinci/kafka/consumer/AggKafkaConsumerService.java

Lines changed: 103 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.linkedin.venice.utils.DaemonThreadFactory;
2222
import com.linkedin.venice.utils.RedundantExceptionFilter;
2323
import com.linkedin.venice.utils.SystemTime;
24+
import com.linkedin.venice.utils.Time;
2425
import com.linkedin.venice.utils.Utils;
2526
import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap;
2627
import io.tehuti.metrics.MetricsRepository;
@@ -78,6 +79,8 @@ public class AggKafkaConsumerService extends AbstractVeniceService {
7879

7980
private final static String STUCK_CONSUMER_MSG =
8081
"Didn't find any suspicious ingestion task, and please contact developers to investigate it further";
82+
private static final String CONSUMER_POLL_WARNING_MESSAGE_PREFIX =
83+
"Consumer poll tracker found stale topic partitions for consumer service: ";
8184

8285
private final VeniceJsonSerializer<Map<String, Map<String, TopicPartitionIngestionInfo>>> topicPartitionIngestionContextJsonSerializer =
8386
new VeniceJsonSerializer<>(new TypeReference<Map<String, Map<String, TopicPartitionIngestionInfo>>>() {
@@ -117,11 +120,14 @@ public AggKafkaConsumerService(
117120
int intervalInSeconds = serverConfig.getStuckConsumerRepairIntervalSecond();
118121
this.stuckConsumerRepairExecutorService.scheduleAtFixedRate(
119122
getStuckConsumerDetectionAndRepairRunnable(
123+
LOGGER,
124+
SystemTime.INSTANCE,
120125
kafkaServerToConsumerServiceMap,
121126
versionTopicStoreIngestionTaskMapping,
122127
TimeUnit.SECONDS.toMillis(serverConfig.getStuckConsumerDetectionRepairThresholdSecond()),
123128
TimeUnit.SECONDS.toMillis(serverConfig.getNonExistingTopicIngestionTaskKillThresholdSecond()),
124129
TimeUnit.SECONDS.toMillis(serverConfig.getNonExistingTopicCheckRetryIntervalSecond()),
130+
TimeUnit.SECONDS.toMillis(serverConfig.getConsumerPollTrackerStaleThresholdSeconds()),
125131
new StuckConsumerRepairStats(metricsRepository),
126132
killIngestionTaskRunnable),
127133
intervalInSeconds,
@@ -154,11 +160,14 @@ public void stopInner() throws Exception {
154160
}
155161

156162
protected static Runnable getStuckConsumerDetectionAndRepairRunnable(
163+
Logger logger,
164+
Time time,
157165
Map<String, AbstractKafkaConsumerService> kafkaServerToConsumerServiceMap,
158166
Map<String, StoreIngestionTask> versionTopicStoreIngestionTaskMapping,
159167
long stuckConsumerRepairThresholdMs,
160168
long nonExistingTopicIngestionTaskKillThresholdMs,
161169
long nonExistingTopicRetryIntervalMs,
170+
long consumerPollTrackerStaleThresholdMs,
162171
StuckConsumerRepairStats stuckConsumerRepairStats,
163172
Consumer<String> killIngestionTaskRunnable) {
164173
return () -> {
@@ -177,82 +186,113 @@ protected static Runnable getStuckConsumerDetectionAndRepairRunnable(
177186
long maxDelayMs = consumerService.getMaxElapsedTimeMSSinceLastPollInConsumerPool();
178187
if (maxDelayMs >= stuckConsumerRepairThresholdMs) {
179188
scanStoreIngestionTaskToFixStuckConsumer = true;
180-
LOGGER.warn("Found some consumer has stuck for {} ms, will start the repairing procedure", maxDelayMs);
189+
logger.warn("Found some consumer has stuck for {} ms, will start the repairing procedure", maxDelayMs);
181190
break;
182191
}
183192
}
184-
if (!scanStoreIngestionTaskToFixStuckConsumer) {
185-
return;
186-
}
187-
stuckConsumerRepairStats.recordStuckConsumerFound();
188-
189-
/**
190-
* Collect a list of SITs, whose version topic doesn't exist by checking {@link StoreIngestionTask#isProducingVersionTopicHealthy()},
191-
* and this function will continue to check the version topic healthiness for a period of {@link nonExistingTopicIngestionTaskKillThresholdMs}
192-
* to tolerate transient topic discovery issue.
193-
*/
194-
Map<String, StoreIngestionTask> versionTopicIngestionTaskMappingForNonExistingTopic = new HashMap<>();
195-
versionTopicStoreIngestionTaskMapping.forEach((vt, sit) -> {
196-
try {
197-
if (!sit.isProducingVersionTopicHealthy()) {
198-
versionTopicIngestionTaskMappingForNonExistingTopic.put(vt, sit);
199-
LOGGER.warn("The producing version topic:{} is not healthy", vt);
193+
if (scanStoreIngestionTaskToFixStuckConsumer) {
194+
stuckConsumerRepairStats.recordStuckConsumerFound();
195+
196+
/**
197+
* Collect a list of SITs, whose version topic doesn't exist by checking {@link StoreIngestionTask#isProducingVersionTopicHealthy()},
198+
* and this function will continue to check the version topic healthiness for a period of {@link nonExistingTopicIngestionTaskKillThresholdMs}
199+
* to tolerate transient topic discovery issue.
200+
*/
201+
Map<String, StoreIngestionTask> versionTopicIngestionTaskMappingForNonExistingTopic = new HashMap<>();
202+
versionTopicStoreIngestionTaskMapping.forEach((vt, sit) -> {
203+
try {
204+
if (!sit.isProducingVersionTopicHealthy()) {
205+
versionTopicIngestionTaskMappingForNonExistingTopic.put(vt, sit);
206+
logger.warn("The producing version topic:{} is not healthy", vt);
207+
}
208+
} catch (Exception e) {
209+
logger.error("Got exception while checking topic existence for version topic: {}", vt, e);
210+
}
211+
});
212+
int maxAttempts =
213+
(int) Math.ceil((double) nonExistingTopicIngestionTaskKillThresholdMs / nonExistingTopicRetryIntervalMs);
214+
for (int cnt = 0; cnt < maxAttempts; ++cnt) {
215+
Iterator<Map.Entry<String, StoreIngestionTask>> iterator =
216+
versionTopicIngestionTaskMappingForNonExistingTopic.entrySet().iterator();
217+
while (iterator.hasNext()) {
218+
Map.Entry<String, StoreIngestionTask> entry = iterator.next();
219+
String versionTopic = entry.getKey();
220+
StoreIngestionTask sit = entry.getValue();
221+
try {
222+
if (sit.isProducingVersionTopicHealthy()) {
223+
/**
224+
* If the version topic becomes available after retries, remove it from the tracking map.
225+
*/
226+
iterator.remove();
227+
logger.info("The producing version topic:{} becomes healthy", versionTopic);
228+
}
229+
} catch (Exception e) {
230+
logger.error("Got exception while checking topic existence for version topic: {}", versionTopic, e);
231+
} finally {
232+
Utils.sleep(nonExistingTopicRetryIntervalMs);
233+
}
200234
}
201-
} catch (Exception e) {
202-
LOGGER.error("Got exception while checking topic existence for version topic: {}", vt, e);
203235
}
204-
});
205-
int maxAttempts =
206-
(int) Math.ceil((double) nonExistingTopicIngestionTaskKillThresholdMs / nonExistingTopicRetryIntervalMs);
207-
for (int cnt = 0; cnt < maxAttempts; ++cnt) {
208-
Iterator<Map.Entry<String, StoreIngestionTask>> iterator =
209-
versionTopicIngestionTaskMappingForNonExistingTopic.entrySet().iterator();
210-
while (iterator.hasNext()) {
211-
Map.Entry<String, StoreIngestionTask> entry = iterator.next();
212-
String versionTopic = entry.getKey();
213-
StoreIngestionTask sit = entry.getValue();
236+
237+
AtomicBoolean hasRepairedSomeIngestionTask = new AtomicBoolean(false);
238+
versionTopicIngestionTaskMappingForNonExistingTopic.forEach((vt, sit) -> {
214239
try {
215-
if (sit.isProducingVersionTopicHealthy()) {
216-
/**
217-
* If the version topic becomes available after retries, remove it from the tracking map.
218-
*/
219-
iterator.remove();
220-
LOGGER.info("The producing version topic:{} becomes healthy", versionTopic);
221-
}
240+
logger.warn(
241+
"The ingestion topics (version topic) are not healthy for "
242+
+ "store version: {}, will kill the ingestion task to try to unblock shared consumer",
243+
vt);
244+
/**
245+
* The following function call will interrupt all the stuck {@link org.apache.kafka.clients.producer.KafkaProducer#send} call
246+
* to non-existing topics.
247+
*/
248+
sit.closeVeniceWriters(false);
249+
killIngestionTaskRunnable.accept(vt);
250+
hasRepairedSomeIngestionTask.set(true);
251+
stuckConsumerRepairStats.recordIngestionTaskRepair();
222252
} catch (Exception e) {
223-
LOGGER.error("Got exception while checking topic existence for version topic: {}", versionTopic, e);
224-
} finally {
225-
Utils.sleep(nonExistingTopicRetryIntervalMs);
253+
logger.error("Hit exception while killing the stuck ingestion task for store version: {}", vt, e);
254+
}
255+
});
256+
if (!hasRepairedSomeIngestionTask.get()) {
257+
if (!REDUNDANT_LOGGING_FILTER.isRedundantException(STUCK_CONSUMER_MSG)) {
258+
logger.warn(STUCK_CONSUMER_MSG);
226259
}
260+
stuckConsumerRepairStats.recordRepairFailure();
227261
}
228262
}
229263

230-
AtomicBoolean hasRepairedSomeIngestionTask = new AtomicBoolean(false);
231-
versionTopicIngestionTaskMappingForNonExistingTopic.forEach((vt, sit) -> {
232-
try {
233-
LOGGER.warn(
234-
"The ingestion topics (version topic) are not healthy for "
235-
+ "store version: {}, will kill the ingestion task to try to unblock shared consumer",
236-
vt);
237-
/**
238-
* The following function call will interrupt all the stuck {@link org.apache.kafka.clients.producer.KafkaProducer#send} call
239-
* to non-existing topics.
240-
*/
241-
sit.closeVeniceWriters(false);
242-
killIngestionTaskRunnable.accept(vt);
243-
hasRepairedSomeIngestionTask.set(true);
244-
stuckConsumerRepairStats.recordIngestionTaskRepair();
245-
} catch (Exception e) {
246-
LOGGER.error("Hit exception while killing the stuck ingestion task for store version: {}", vt, e);
247-
}
248-
});
249-
if (!hasRepairedSomeIngestionTask.get()) {
250-
if (!REDUNDANT_LOGGING_FILTER.isRedundantException(STUCK_CONSUMER_MSG)) {
251-
LOGGER.warn(STUCK_CONSUMER_MSG);
264+
reportStaleTopicPartitions(logger, time, kafkaServerToConsumerServiceMap, consumerPollTrackerStaleThresholdMs);
265+
};
266+
}
267+
268+
private static void reportStaleTopicPartitions(
269+
Logger logger,
270+
Time time,
271+
Map<String, AbstractKafkaConsumerService> kafkaServerToConsumerServiceMap,
272+
long consumerPollTrackerStaleThresholdMs) {
273+
StringBuilder stringBuilder = new StringBuilder();
274+
long now = time.getMilliseconds();
275+
// Detect and log any subscribed topic partitions that are not polling records
276+
for (Map.Entry<String, AbstractKafkaConsumerService> consumerService: kafkaServerToConsumerServiceMap.entrySet()) {
277+
Map<PubSubTopicPartition, Long> staleTopicPartitions =
278+
consumerService.getValue().getStaleTopicPartitions(now - consumerPollTrackerStaleThresholdMs);
279+
if (!staleTopicPartitions.isEmpty()) {
280+
stringBuilder.append(CONSUMER_POLL_WARNING_MESSAGE_PREFIX);
281+
stringBuilder.append(consumerService.getKey());
282+
for (Map.Entry<PubSubTopicPartition, Long> staleTopicPartition: staleTopicPartitions.entrySet()) {
283+
stringBuilder.append("\n topic: ");
284+
stringBuilder.append(staleTopicPartition.getKey().getTopicName());
285+
stringBuilder.append(" partition: ");
286+
stringBuilder.append(staleTopicPartition.getKey().getPartitionNumber());
287+
stringBuilder.append(" stale for: ");
288+
stringBuilder.append(now - staleTopicPartition.getValue());
289+
stringBuilder.append("ms");
252290
}
253-
stuckConsumerRepairStats.recordRepairFailure();
291+
logger.warn(stringBuilder.toString());
292+
// clear the StringBuilder to be reused for next reporting cycle
293+
stringBuilder.setLength(0);
254294
}
255-
};
295+
}
256296
}
257297

258298
/**
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package com.linkedin.davinci.kafka.consumer;
2+
3+
import com.linkedin.venice.annotation.Threadsafe;
4+
import com.linkedin.venice.pubsub.api.PubSubTopicPartition;
5+
import com.linkedin.venice.utils.Time;
6+
import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap;
7+
import java.util.Map;
8+
import java.util.stream.Collectors;
9+
10+
11+
/**
12+
* This class maintains a map of all the subscribed topic partitions and the timestamp when it was subscribed. The
13+
* intention is to detect topic partitions that are subscribed but received no messages for an extended period of time
14+
* due to any of the following reasons:
15+
* 1. Starvation due to shared consumer
16+
* 2. Pub-sub broker or client issues
17+
* 3. Code bug
18+
* All subscribed topic partitions are expected to receive messages from polls because:
19+
* 1. RT topics have heartbeat messages produced to them periodically. VT for hybrid stores will receive them too.
20+
* 2. VT for batch only stores should unsubscribe after completion (EOP received).
21+
* Once message(s) are received the corresponding topic partition is removed from the tracking map.
22+
* TODO: Currently the tracker is unable to differentiate RT topic subscriptions across different versions. e.g. current
23+
* version and future version leader for a given partition is on the same host and subscribed to different offset of the
24+
* RT topic. Current version is able to poll successfully and future version cannot. The current implementation is
25+
* able to detect this but unable to report exactly which subscription is having trouble polling.
26+
*/
27+
@Threadsafe
28+
public class ConsumerPollTracker {
29+
private final VeniceConcurrentHashMap<PubSubTopicPartition, Long> trackingMap = new VeniceConcurrentHashMap<>();
30+
private final Time time;
31+
32+
public ConsumerPollTracker(Time time) {
33+
this.time = time;
34+
}
35+
36+
/**
37+
* Record the subscribe timestamp for a given topic partition.
38+
* @param pubSubTopicPartition to record the activity for.
39+
*/
40+
public void recordSubscribed(PubSubTopicPartition pubSubTopicPartition) {
41+
trackingMap.put(pubSubTopicPartition, time.getMilliseconds());
42+
}
43+
44+
public void recordMessageReceived(PubSubTopicPartition pubSubTopicPartition) {
45+
trackingMap.computeIfPresent(pubSubTopicPartition, (topicPartition, timestamp) -> null);
46+
}
47+
48+
public void removeTopicPartition(PubSubTopicPartition pubSubTopicPartition) {
49+
trackingMap.remove(pubSubTopicPartition);
50+
}
51+
52+
/**
53+
* @param thresholdTimestamp to get topic partitions with older last activity timestamp than the threshold timestamp.
54+
* @return a map of topic partitions with last successful activity timestamp older than the provided timestamp.
55+
*/
56+
public Map<PubSubTopicPartition, Long> getStaleTopicPartitions(long thresholdTimestamp) {
57+
return trackingMap.entrySet()
58+
.stream()
59+
.filter(entry -> entry.getValue() < thresholdTimestamp)
60+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
61+
}
62+
}

clients/da-vinci-client/src/main/java/com/linkedin/davinci/kafka/consumer/ConsumptionTask.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class ConsumptionTask implements Runnable {
5454
private final IntConsumer recordsThrottler;
5555
private final AggKafkaConsumerServiceStats aggStats;
5656
private final ConsumerSubscriptionCleaner cleaner;
57+
private final ConsumerPollTracker consumerPollTracker;
5758

5859
/**
5960
* Maintain rate counter with default window size to calculate the message and bytes rate at topic partition level.
@@ -83,13 +84,15 @@ public ConsumptionTask(
8384
final IntConsumer bandwidthThrottler,
8485
final IntConsumer recordsThrottler,
8586
final AggKafkaConsumerServiceStats aggStats,
86-
final ConsumerSubscriptionCleaner cleaner) {
87+
final ConsumerSubscriptionCleaner cleaner,
88+
final ConsumerPollTracker consumerPollTracker) {
8789
this.readCycleDelayMs = readCycleDelayMs;
8890
this.pollFunction = pollFunction;
8991
this.bandwidthThrottler = bandwidthThrottler;
9092
this.recordsThrottler = recordsThrottler;
9193
this.aggStats = aggStats;
9294
this.cleaner = cleaner;
95+
this.consumerPollTracker = consumerPollTracker;
9396
this.taskId = taskId;
9497
this.consumptionTaskIdStr = Utils.getSanitizedStringForLogger(consumerNamePrefix) + " - " + taskId;
9598
this.LOGGER = LogManager.getLogger(getClass().getSimpleName() + "[ " + consumptionTaskIdStr + " ]");
@@ -134,6 +137,7 @@ public void run() {
134137
if (dataReceiver != null) {
135138
dataReceiver.notifyOfTopicDeletion(topicPartitionToUnSub.getPubSubTopic().getName());
136139
}
140+
consumerPollTracker.removeTopicPartition(topicPartitionToUnSub);
137141
}
138142
topicPartitionsToUnsub.clear();
139143

@@ -165,6 +169,7 @@ public void run() {
165169
topicPartitionMessages = entry.getValue();
166170

167171
// Per-poll bookkeeping
172+
consumerPollTracker.recordMessageReceived(pubSubTopicPartition);
168173
msgCount = topicPartitionMessages.size();
169174
polledPubSubMessagesCount += msgCount;
170175
payloadSizePerTopicPartition = 0;

0 commit comments

Comments
 (0)