From f662248ca704d0e04ae2c8bbabafce68c822156a Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Mon, 12 May 2025 10:45:48 -0700 Subject: [PATCH 01/11] [client] Add OTel key_count metrics Add key_count metrics in OTel. --- .../client/StatsAvroGenericDaVinciClient.java | 10 +- .../StatsAvroGenericDaVinciClientTest.java | 4 +- .../StatsAvroGenericStoreClient.java | 16 ++- ...DispatchingAvroGenericStoreClientTest.java | 8 +- .../partition/VeniceSparkPartitionerTest.java | 4 +- .../task/SparkDataWriterTaskTrackerTest.java | 4 +- .../SparkEngineTaskConfigProviderTest.java | 8 +- .../venice/client/stats/BasicClientStats.java | 101 +++++++++++++++++- .../client/store/StatTrackingStoreClient.java | 15 ++- .../client/stats/BasicClientStatsTest.java | 68 +++++++++--- .../com/linkedin/venice/stats/ClientType.java | 4 + .../linkedin/venice/stats/ClientTypeTest.java | 25 +++++ 12 files changed, 236 insertions(+), 31 deletions(-) create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/ClientTypeTest.java diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java index 489204c050a..d516d5b1682 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java @@ -83,6 +83,10 @@ public CompletableFuture get(K key, V reusableValue) { return trackRequest(clientStatsForSingleGet, () -> super.get(key, reusableValue)).whenComplete((v, throwable) -> { if (throwable == null && v != null) { clientStatsForSingleGet.recordSuccessRequestKeyCount(1); + clientStatsForSingleGet.recordFailedRequestKeyCount(0); + } else { + clientStatsForSingleGet.recordSuccessRequestKeyCount(0); + clientStatsForSingleGet.recordFailedRequestKeyCount(1); } }); } @@ -91,8 +95,12 @@ public CompletableFuture get(K key, V reusableValue) { public CompletableFuture> batchGet(Set keys) { clientStatsForBatchGet.recordRequestKeyCount(keys.size()); return trackRequest(clientStatsForBatchGet, () -> super.batchGet(keys)).whenComplete((v, throwable) -> { - if (throwable == null && v != null) { + if (v != null) { clientStatsForBatchGet.recordSuccessRequestKeyCount(v.size()); + clientStatsForBatchGet.recordFailedRequestKeyCount(keys.size() - v.size()); + } else { + clientStatsForBatchGet.recordSuccessRequestKeyCount(0); + clientStatsForBatchGet.recordFailedRequestKeyCount(keys.size()); } }); } diff --git a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java index 2e0591fe90f..d6ddeac24b7 100644 --- a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java +++ b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java @@ -65,7 +65,7 @@ public void testGet(boolean reuseApi) throws ExecutionException, InterruptedExce assertTrue(metrics.get(".test_store--healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0); + assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0 / 2); assertEquals(metrics.get(".test_store--success_request_key_count.Max").value(), 1.0); assertTrue(metrics.get(".test_store--success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--success_request_key_ratio.SimpleRatioStat").value() < 1.0); @@ -105,7 +105,7 @@ public void testBatchGet() throws ExecutionException, InterruptedException { assertTrue(metrics.get(".test_store--multiget_healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 2.0); + assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 1.0); // round (2.0 / 3) assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Max").value(), 2.0); assertTrue(metrics.get(".test_store--multiget_success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--multiget_success_request_key_ratio.SimpleRatioStat").value() < 1.0); diff --git a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java index d287b2aa1e7..c5524450b15 100644 --- a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java +++ b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java @@ -158,6 +158,7 @@ private CompletableFuture recordRequestMetrics( } if (exceptionReceived) { + // We still want to record the partial success key count number even if the request is unhealthy. clientStats.emitUnhealthyRequestMetrics(latency, throwable); } else { clientStats.emitHealthyRequestMetrics(latency, requestContext.successRequestKeyCount.get()); @@ -176,13 +177,15 @@ private CompletableFuture recordRequestMetrics( if (requestContext.responseDeserializationTime > 0) { clientStats.recordResponseDeserializationTime(requestContext.responseDeserializationTime); } - clientStats.recordSuccessRequestKeyCount(requestContext.successRequestKeyCount.get()); } + clientStats.recordSuccessRequestKeyCount(requestContext.successRequestKeyCount.get()); + if (requestContext.noAvailableReplica) { clientStats.recordNoAvailableReplicaRequest(); } + int retrySuccessKeyCount = 0; if (requestContext instanceof GetRequestContext) { GetRequestContext getRequestContext = (GetRequestContext) requestContext; @@ -198,7 +201,8 @@ private CompletableFuture recordRequestMetrics( if (!exceptionReceived) { if (getRequestContext.retryContext.retryWin) { clientStats.recordRetryRequestWin(); - clientStats.recordRetryRequestSuccessKeyCount(1); + retrySuccessKeyCount = 1; + clientStats.recordRetryRequestSuccessKeyCount(retrySuccessKeyCount); } } } @@ -213,7 +217,8 @@ private CompletableFuture recordRequestMetrics( clientStats.recordRetryRequestKeyCount(retryRequestContext.numKeysInRequest); clientStats.recordRetryFanoutSize(retryRequestContext.getFanoutSize()); if (!exceptionReceived) { - clientStats.recordRetryRequestSuccessKeyCount(retryRequestContext.numKeysCompleted.get()); + retrySuccessKeyCount = retryRequestContext.numKeysCompleted.get(); + clientStats.recordRetryRequestSuccessKeyCount(retrySuccessKeyCount); if (retryRequestContext.numKeysCompleted.get() > 0) { clientStats.recordRetryRequestWin(); } @@ -221,6 +226,11 @@ private CompletableFuture recordRequestMetrics( } } + // numberOfKeys = successKeyCount + retrySuccessKeyCount + failedKeyCount. + clientStats.recordFailedRequestKeyCount( + numberOfKeys - requestContext.successRequestKeyCount.get() - retrySuccessKeyCount, + throwable); + if (exceptionReceived) { // throw an exception after incrementing some error related metrics if (throwable instanceof VeniceClientException) { diff --git a/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java b/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java index 1b6ba563448..04a06689f51 100644 --- a/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java +++ b/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java @@ -525,8 +525,8 @@ private void validateMetrics( assertFalse(metrics.get(metricPrefix + "healthy_request_latency.Avg").value() > 0); assertTrue(metrics.get(metricPrefix + "unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(metricPrefix + "unhealthy_request_latency.Avg").value() > 0); - // as partial healthy request is still considered unhealthy, not incrementing the below metric - assertFalse(metrics.get(metricPrefix + "success_request_key_count.Max").value() > 0); + // as partial healthy request is should still report success key count. + assertTrue(metrics.get(metricPrefix + "success_request_key_count.Max").value() > 0); if (batchGet) { assertTrue(metrics.get(metricPrefix + "response_ttfr.Avg").value() > 0); assertEquals(batchGetRequestContext.successRequestKeyCount.get(), (int) successKeyCount); @@ -1294,7 +1294,7 @@ public void testComputeWithExceptionFromTransportLayerForOneRoute() throws IOExc fail(); } catch (Exception e) { assertTrue(e.getMessage().endsWith("At least one route did not complete"), e.getMessage()); - validateComputeRequestMetrics(false, false, RequestType.COMPUTE, false, 2, 1); + validateComputeRequestMetrics(false, true, RequestType.COMPUTE, false, 2, 1); } finally { tearDown(); } @@ -1525,7 +1525,7 @@ public void testStreamingComputeWithExceptionFromTransportLayerForOneRoute() assertEquals( response.get("test_key_1").get("name").toString(), COMPUTE_REQUEST_VALUE_RESPONSE.get("test_key_1").get("name")); - validateComputeRequestMetrics(false, false, RequestType.COMPUTE_STREAMING, false, 2, 1); + validateComputeRequestMetrics(false, true /*partialHealthyRequest*/, RequestType.COMPUTE_STREAMING, false, 2, 1); } finally { tearDown(); } diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java index f209661ac47..8e8dcc60835 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java @@ -41,7 +41,9 @@ public void setUp() { @AfterClass(alwaysRun = true) public void tearDown() { - spark.stop(); + if (spark != null) { + spark.stop(); + } } @Test diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java index b0f39d5537c..735f58a3efa 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java @@ -18,7 +18,9 @@ public void setUp() { @AfterClass(alwaysRun = true) public void tearDown() { - spark.stop(); + if (spark != null) { + spark.stop(); + } } @Test diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java index 0bbacb2862a..1a81cf63c03 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java @@ -33,8 +33,12 @@ public void setUp() { @AfterClass(alwaysRun = true) public void tearDown() { - sparkContext.close(); - spark.stop(); + if (sparkContext != null) { + sparkContext.close(); + } + if (spark != null) { + spark.stop(); + } } @Test diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index f06ca6c8d2f..bb60e0ca4ae 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -11,6 +11,7 @@ import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.SUCCESS; import static com.linkedin.venice.utils.CollectionUtils.setOf; import static org.apache.hc.core5.http.HttpStatus.SC_NOT_FOUND; +import static org.apache.hc.core5.http.HttpStatus.SC_NO_CONTENT; import static org.apache.hc.core5.http.HttpStatus.SC_OK; import com.linkedin.venice.client.exceptions.VeniceClientHttpException; @@ -74,6 +75,10 @@ public class BasicClientStats extends AbstractVeniceHttpStats { private final MetricEntityStateOneEnum healthyLatencyMetricForDavinciClient; private final MetricEntityStateOneEnum unhealthyLatencyMetricForDavinciClient; private final Sensor requestKeyCountSensor; + private final MetricEntityStateThreeEnums healthyKeyCountMetric; + private final MetricEntityStateThreeEnums unhealthyKeyCountMetric; + private final MetricEntityStateOneEnum healthyKeyCountMetricForDavinciClient; + private final MetricEntityStateOneEnum unhealthyKeyCountMetricForDavinciClient; private final Sensor successRequestKeyCountSensor; private final Sensor successRequestRatioSensor; private final Sensor successRequestKeyRatioSensor; @@ -172,10 +177,31 @@ protected BasicClientStats( getFullMetricName(BasicClientTehutiMetricName.UNHEALTHY_REQUEST_LATENCY.getMetricName()))), getBaseDimensionsMap(), VeniceResponseStatusCategory.class); + + healthyKeyCountMetricForDavinciClient = MetricEntityStateOneEnum.create( + BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), + getOtelRepository(), + this::registerSensor, + BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, + Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), + baseDimensionsMap, + VeniceResponseStatusCategory.class); + + unhealthyKeyCountMetricForDavinciClient = MetricEntityStateOneEnum.create( + BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), + otelRepository, + this::registerSensor, + BasicClientTehutiMetricName.FAILED_REQUEST_KEY_COUNT, + Arrays.asList(new Rate(), new Avg(), new Max()), + baseDimensionsMap, + VeniceResponseStatusCategory.class); + healthyRequestMetric = null; unhealthyRequestMetric = null; healthyLatencyMetric = null; unhealthyLatencyMetric = null; + healthyKeyCountMetric = null; + unhealthyKeyCountMetric = null; } else { healthyRequestMetric = MetricEntityStateThreeEnums.create( BasicClientMetricEntity.CALL_COUNT.getMetricEntity(), @@ -226,10 +252,34 @@ protected BasicClientStats( HttpResponseStatusEnum.class, HttpResponseStatusCodeCategory.class, VeniceResponseStatusCategory.class); + // key count + healthyKeyCountMetric = MetricEntityStateThreeEnums.create( + BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), + getOtelRepository(), + this::registerSensor, + BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, + Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), + baseDimensionsMap, + HttpResponseStatusEnum.class, + HttpResponseStatusCodeCategory.class, + VeniceResponseStatusCategory.class); + unhealthyKeyCountMetric = MetricEntityStateThreeEnums.create( + BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + BasicClientTehutiMetricName.FAILED_REQUEST_KEY_COUNT, + Arrays.asList(new Rate(), new Avg(), new Max()), + baseDimensionsMap, + HttpResponseStatusEnum.class, + HttpResponseStatusCodeCategory.class, + VeniceResponseStatusCategory.class); + healthyRequestMetricForDavinciClient = null; unhealthyRequestMetricForDavinciClient = null; healthyLatencyMetricForDavinciClient = null; unhealthyLatencyMetricForDavinciClient = null; + healthyKeyCountMetricForDavinciClient = null; + unhealthyKeyCountMetricForDavinciClient = null; } // successRequestRatioSensor will be a derived metric in OTel @@ -301,6 +351,34 @@ public void recordRequestKeyCount(int keyCount) { public void recordSuccessRequestKeyCount(int successKeyCount) { successRequestKeyCountSensor.record(successKeyCount); + if (ClientType.isDavinciClient(this.clientType)) { + healthyKeyCountMetricForDavinciClient.record(successKeyCount, SUCCESS); + } else { + int httpStatus = getHealthyRequestHttpStatus(successKeyCount); + HttpResponseStatusEnum statusEnum = transformIntToHttpResponseStatusEnum(httpStatus); + HttpResponseStatusCodeCategory httpCategory = getVeniceHttpResponseStatusCodeCategory(httpStatus); + healthyKeyCountMetric.record(successKeyCount, statusEnum, httpCategory, SUCCESS); + } + } + + public void recordFailedRequestKeyCount(int failedKeyCount, Throwable throwable) { + if (ClientType.isDavinciClient(this.clientType)) { + unhealthyKeyCountMetricForDavinciClient.record(failedKeyCount, FAIL); + } else { + /** + * When throwable is null and the failed key count is 0, it means that the request was successful. However, + * we still need to record the failed key count as 0, and thus we use a default http status of SC_NO_CONTENT + * to indicate success. + */ + int httpStatus = throwable != null ? getUnhealthyRequestHttpStatus(throwable) : SC_NO_CONTENT; + HttpResponseStatusEnum statusEnum = transformIntToHttpResponseStatusEnum(httpStatus); + HttpResponseStatusCodeCategory httpCategory = getVeniceHttpResponseStatusCodeCategory(httpStatus); + unhealthyKeyCountMetric.record(failedKeyCount, statusEnum, httpCategory, FAIL); + } + } + + public void recordFailedRequestKeyCount(int failedKeyCount) { + recordFailedRequestKeyCount(failedKeyCount, null); } protected final Rate getRequestRate() { @@ -361,7 +439,8 @@ private Map getBaseDimensionsMap() { * Metric names for tehuti metrics used in this class. */ public enum BasicClientTehutiMetricName implements TehutiMetricNameEnum { - HEALTHY_REQUEST, UNHEALTHY_REQUEST, HEALTHY_REQUEST_LATENCY, UNHEALTHY_REQUEST_LATENCY; + HEALTHY_REQUEST, UNHEALTHY_REQUEST, HEALTHY_REQUEST_LATENCY, UNHEALTHY_REQUEST_LATENCY, SUCCESS_REQUEST_KEY_COUNT, + FAILED_REQUEST_KEY_COUNT; private final String metricName; @@ -401,6 +480,18 @@ public enum BasicClientMetricEntity { HTTP_RESPONSE_STATUS_CODE_CATEGORY, VENICE_RESPONSE_STATUS_CODE_CATEGORY) ), + /** + * Count of keys during response handling along with response codes + */ + KEY_COUNT( + MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys during response handling along with response codes", + setOf( + VENICE_STORE_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY) + ), /** * Count of all DaVinci requests: as DaVinci is local reads, we do not track HTTP response codes * But keeping the same name call_count across all clients for consistency @@ -416,6 +507,14 @@ public enum BasicClientMetricEntity { CALL_TIME.name().toLowerCase(), MetricType.HISTOGRAM, MetricUnit.MILLISECOND, "Latency for all DaVinci Client responses", setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY) + ), + /** + * Count of keys during response handling along with response codes + */ + KEY_COUNT_DVC( + KEY_COUNT.name().toLowerCase(), MetricType.HISTOGRAM, MetricUnit.NUMBER, + "Count of keys for all DaVinci Client responses", + setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY) ); private final MetricEntity metricEntity; diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java index 50fed004d70..090c616a145 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java @@ -97,7 +97,7 @@ public CompletableFuture get(K key) { CompletableFuture innerFuture = super.get(key, Optional.of(singleGetStats), startTimeInNS); singleGetStats.recordRequestKeyCount(1); CompletableFuture statFuture = innerFuture - .handle((BiFunction) getStatCallback(singleGetStats, startTimeInNS)); + .handle((BiFunction) getStatCallback(singleGetStats, 1, startTimeInNS)); return AppTimeOutTrackingCompletableFuture.track(statFuture, singleGetStats); } @@ -107,7 +107,7 @@ public CompletableFuture getRaw(String requestPath) { CompletableFuture innerFuture = super.getRaw(requestPath, Optional.of(schemaReaderStats), startTimeInNS); schemaReaderStats.recordRequestKeyCount(1); CompletableFuture statFuture = innerFuture.handle( - (BiFunction) getStatCallback(schemaReaderStats, startTimeInNS)); + (BiFunction) getStatCallback(schemaReaderStats, 1, startTimeInNS)); return statFuture; } @@ -182,6 +182,7 @@ private static class StatTrackingStreamingCallback extends DelegatingTrack private final Optional statsOptional; private final long preRequestTimeInNS; private final StreamingResponseTracker streamingResponseTracker; + private final int keyCnt; public StatTrackingStreamingCallback( StreamingCallback callback, @@ -192,6 +193,7 @@ public StatTrackingStreamingCallback( this.stats = stats; this.statsOptional = Optional.of(stats); this.preRequestTimeInNS = preRequestTimeInNS; + this.keyCnt = keyCnt; streamingResponseTracker = new StreamingResponseTracker(stats, keyCnt, preRequestTimeInNS); } @@ -215,6 +217,7 @@ public void onDeserializationCompletion( preRequestTimeInNS, exception, successKeyCount, + keyCnt, duplicateEntryCount); } } @@ -257,6 +260,7 @@ private static void handleMetricTrackingForStreamingCallback( long startTimeInNS, Optional exception, int successKeyCnt, + int keyCnt, int duplicateEntryCnt) { double latency = LatencyUtils.getElapsedTimeFromNSToMS(startTimeInNS); if (exception.isPresent()) { @@ -265,6 +269,8 @@ private static void handleMetricTrackingForStreamingCallback( clientStats.emitHealthyRequestMetrics(latency, successKeyCnt); } clientStats.recordSuccessRequestKeyCount(successKeyCnt); + // keyCnt = successKeyCnt + failedKeyCnt + clientStats.recordFailedRequestKeyCount(keyCnt - successKeyCnt); clientStats.recordSuccessDuplicateRequestKeyCount(duplicateEntryCnt); } @@ -280,6 +286,7 @@ public ComputeRequestBuilder compute() throws VeniceClientException { public static BiFunction getStatCallback( ClientStats clientStats, + int keyCount, long startTimeInNS) { return (T value, Throwable throwable) -> { double latency = LatencyUtils.getElapsedTimeFromNSToMS(startTimeInNS); @@ -289,7 +296,9 @@ public ComputeRequestBuilder compute() throws VeniceClientException { } clientStats.emitHealthyRequestMetrics(latency, value); - clientStats.recordSuccessRequestKeyCount(getSuccessfulKeyCount(value)); + int successfulKeyCount = getSuccessfulKeyCount(value); + clientStats.recordSuccessRequestKeyCount(successfulKeyCount); + clientStats.recordFailedRequestKeyCount(keyCount - successfulKeyCount, throwable); return value; }; } diff --git a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java index d2ed06f7ae0..4a1b3855cba 100644 --- a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java +++ b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java @@ -12,6 +12,7 @@ import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.*; import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getExponentialHistogramPointData; import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getLongPointData; import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.validateExponentialHistogramPointData; @@ -28,6 +29,7 @@ import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; import com.linkedin.venice.stats.metrics.MetricUnit; +import com.linkedin.venice.utils.DataProviderUtils; import com.linkedin.venice.utils.Utils; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.common.AttributesBuilder; @@ -81,13 +83,7 @@ public void testEmitHealthyMetrics() { stats.emitHealthyRequestMetrics(90.0, 2); validateTehutiMetrics(stats.getMetricsRepository(), ".test_store", true, 90.0); - validateOtelMetrics( - inMemoryMetricReader, - "test_store", - SC_OK, - VeniceResponseStatusCategory.SUCCESS, - 90.0, - THIN_CLIENT.getMetricsPrefix()); + validateOtelMetrics(inMemoryMetricReader, "test_store", SC_OK, SUCCESS, 90.0, THIN_CLIENT.getMetricsPrefix()); } @Test @@ -97,12 +93,7 @@ public void testEmitHealthyRequestMetricsForDavinciClient() { stats.emitHealthyRequestMetricsForDavinciClient(90.0); validateTehutiMetrics(stats.getMetricsRepository(), ".test_store", true, 90.0); - validateOtelMetrics( - inMemoryMetricReader, - "test_store", - VeniceResponseStatusCategory.SUCCESS, - 90.0, - DAVINCI_CLIENT.getMetricsPrefix()); + validateOtelMetrics(inMemoryMetricReader, "test_store", SUCCESS, 90.0, DAVINCI_CLIENT.getMetricsPrefix()); } @Test @@ -154,6 +145,36 @@ public void testEmitUnhealthyRequestMetricsForDavinciClientWithWrongClientType() Assert.assertFalse(metrics.get(".test_store--request.OccurrenceRate").value() > 0.0); } + @Test(dataProviderClass = DataProviderUtils.class, dataProvider = "True-and-False") + public void testKeyCountMetricsForDaVinciClient(boolean isSuccess) { + InMemoryMetricReader inMemoryMetricReader = InMemoryMetricReader.create(); + BasicClientStats stats = createStats(inMemoryMetricReader, DAVINCI_CLIENT); + int keyCount = 10; + if (isSuccess) { + stats.recordSuccessRequestKeyCount(keyCount); + } else { + stats.recordFailedRequestKeyCount(keyCount); + } + + // Check Tehuti metrics + Map metrics = stats.getMetricsRepository().metrics(); + String storeName = "test_store"; + if (isSuccess) { + Assert.assertEquals( + (int) metrics.get(String.format(".%s--success_request_key_count.Max", storeName)).value(), + keyCount); + // We don't have failure key count metrics for key count in Tehuti. + } + + // Check OpenTelemetry metrics + Collection metricsData = inMemoryMetricReader.collectAllMetrics(); + Attributes expectedAttributes = getExpectedAttributes(storeName, -1, isSuccess ? SUCCESS : FAIL); + + ExponentialHistogramPointData data = + getExponentialHistogramPointData(metricsData, "key_count", DAVINCI_CLIENT.getMetricsPrefix()); + validateExponentialHistogramPointData(data, keyCount, keyCount, 1, keyCount, expectedAttributes); + } + private BasicClientStats createStats(InMemoryMetricReader inMemoryMetricReader, ClientType clientType) { String storeName = "test_store"; VeniceMetricsRepository metricsRepository = @@ -248,6 +269,27 @@ public void testClientMetricEntities() { MetricUnit.MILLISECOND, "Latency for all DaVinci Client responses", Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + expectedMetrics.put( + BasicClientStats.BasicClientMetricEntity.KEY_COUNT, + new MetricEntity( + "key_count", + MetricType.HISTOGRAM, + MetricUnit.NUMBER, + "Count of keys during response handling along with response codes", + Utils.setOf( + VENICE_STORE_NAME, + VENICE_REQUEST_METHOD, + HTTP_RESPONSE_STATUS_CODE, + HTTP_RESPONSE_STATUS_CODE_CATEGORY, + VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + expectedMetrics.put( + BasicClientStats.BasicClientMetricEntity.KEY_COUNT_DVC, + new MetricEntity( + "key_count", + MetricType.HISTOGRAM, + MetricUnit.NUMBER, + "Count of keys for all DaVinci Client responses", + Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY))); for (BasicClientStats.BasicClientMetricEntity metric: BasicClientStats.BasicClientMetricEntity.values()) { MetricEntity actual = metric.getMetricEntity(); diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/ClientType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/ClientType.java index dd0b32835c8..5f335159aea 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/ClientType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/ClientType.java @@ -18,4 +18,8 @@ public String getName() { public String getMetricsPrefix() { return otelMetricsPrefix; } + + public static boolean isDavinciClient(ClientType clientType) { + return clientType == DAVINCI_CLIENT; + } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/ClientTypeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/ClientTypeTest.java new file mode 100644 index 00000000000..1eb45b81ac0 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/ClientTypeTest.java @@ -0,0 +1,25 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + + +public class ClientTypeTest { + @Test + public void testClientType() { + assertEquals(ClientType.THIN_CLIENT.getName(), "thin-client"); + assertEquals(ClientType.FAST_CLIENT.getName(), "fast-client"); + assertEquals(ClientType.DAVINCI_CLIENT.getName(), "davinci-client"); + + assertEquals(ClientType.THIN_CLIENT.getMetricsPrefix(), "thin_client"); + assertEquals(ClientType.FAST_CLIENT.getMetricsPrefix(), "fast_client"); + assertEquals(ClientType.DAVINCI_CLIENT.getMetricsPrefix(), "davinci_client"); + + assertTrue(ClientType.isDavinciClient(ClientType.DAVINCI_CLIENT)); + assertFalse(ClientType.isDavinciClient(ClientType.THIN_CLIENT)); + assertFalse(ClientType.isDavinciClient(ClientType.FAST_CLIENT)); + } +} From 13b778be83c89fdb666cfa5d7894cdacb47c6f6a Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Thu, 15 May 2025 15:37:26 -0700 Subject: [PATCH 02/11] addressed comments on using local variable and fixing typos --- .../venice/fastclient/StatsAvroGenericStoreClient.java | 10 ++++------ .../DispatchingAvroGenericStoreClientTest.java | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java index c5524450b15..af97e007252 100644 --- a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java +++ b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java @@ -158,7 +158,6 @@ private CompletableFuture recordRequestMetrics( } if (exceptionReceived) { - // We still want to record the partial success key count number even if the request is unhealthy. clientStats.emitUnhealthyRequestMetrics(latency, throwable); } else { clientStats.emitHealthyRequestMetrics(latency, requestContext.successRequestKeyCount.get()); @@ -178,8 +177,9 @@ private CompletableFuture recordRequestMetrics( clientStats.recordResponseDeserializationTime(requestContext.responseDeserializationTime); } } - - clientStats.recordSuccessRequestKeyCount(requestContext.successRequestKeyCount.get()); + // We want to record the partial success key count number, no matter the request is healthy or unhealthy. + int successKeyCount = requestContext.successRequestKeyCount.get(); + clientStats.recordSuccessRequestKeyCount(successKeyCount); if (requestContext.noAvailableReplica) { clientStats.recordNoAvailableReplicaRequest(); @@ -227,9 +227,7 @@ private CompletableFuture recordRequestMetrics( } // numberOfKeys = successKeyCount + retrySuccessKeyCount + failedKeyCount. - clientStats.recordFailedRequestKeyCount( - numberOfKeys - requestContext.successRequestKeyCount.get() - retrySuccessKeyCount, - throwable); + clientStats.recordFailedRequestKeyCount(numberOfKeys - successKeyCount - retrySuccessKeyCount, throwable); if (exceptionReceived) { // throw an exception after incrementing some error related metrics diff --git a/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java b/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java index 04a06689f51..db8e509c4a0 100644 --- a/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java +++ b/clients/venice-client/src/test/java/com/linkedin/venice/fastclient/DispatchingAvroGenericStoreClientTest.java @@ -525,7 +525,7 @@ private void validateMetrics( assertFalse(metrics.get(metricPrefix + "healthy_request_latency.Avg").value() > 0); assertTrue(metrics.get(metricPrefix + "unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(metricPrefix + "unhealthy_request_latency.Avg").value() > 0); - // as partial healthy request is should still report success key count. + // as partial healthy request should still report success key count number. assertTrue(metrics.get(metricPrefix + "success_request_key_count.Max").value() > 0); if (batchGet) { assertTrue(metrics.get(metricPrefix + "response_ttfr.Avg").value() > 0); From 43154b033c5e6759998f19d6dfec24ebdcdfa2d7 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Mon, 19 May 2025 15:40:01 -0700 Subject: [PATCH 03/11] addressed comments on using request/response for key_count --- .../client/StatsAvroGenericDaVinciClient.java | 12 +- .../StatsAvroGenericDaVinciClientTest.java | 4 +- .../StatsAvroGenericStoreClient.java | 14 +- .../venice/client/stats/BasicClientStats.java | 133 ++++++------------ .../client/store/StatTrackingStoreClient.java | 17 +-- .../client/stats/BasicClientStatsTest.java | 41 +++--- .../venice/stats/dimensions/MessageType.java | 28 ++++ .../dimensions/VeniceMetricsDimensions.java | 2 + 8 files changed, 111 insertions(+), 140 deletions(-) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/MessageType.java diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java index d516d5b1682..30df70a3204 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java @@ -82,11 +82,7 @@ public CompletableFuture get(K key, V reusableValue) { clientStatsForSingleGet.recordRequestKeyCount(1); return trackRequest(clientStatsForSingleGet, () -> super.get(key, reusableValue)).whenComplete((v, throwable) -> { if (throwable == null && v != null) { - clientStatsForSingleGet.recordSuccessRequestKeyCount(1); - clientStatsForSingleGet.recordFailedRequestKeyCount(0); - } else { - clientStatsForSingleGet.recordSuccessRequestKeyCount(0); - clientStatsForSingleGet.recordFailedRequestKeyCount(1); + clientStatsForSingleGet.recordResponseKeyCount(1); } }); } @@ -96,11 +92,7 @@ public CompletableFuture> batchGet(Set keys) { clientStatsForBatchGet.recordRequestKeyCount(keys.size()); return trackRequest(clientStatsForBatchGet, () -> super.batchGet(keys)).whenComplete((v, throwable) -> { if (v != null) { - clientStatsForBatchGet.recordSuccessRequestKeyCount(v.size()); - clientStatsForBatchGet.recordFailedRequestKeyCount(keys.size() - v.size()); - } else { - clientStatsForBatchGet.recordSuccessRequestKeyCount(0); - clientStatsForBatchGet.recordFailedRequestKeyCount(keys.size()); + clientStatsForBatchGet.recordResponseKeyCount(v.size()); } }); } diff --git a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java index d6ddeac24b7..2e0591fe90f 100644 --- a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java +++ b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java @@ -65,7 +65,7 @@ public void testGet(boolean reuseApi) throws ExecutionException, InterruptedExce assertTrue(metrics.get(".test_store--healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0 / 2); + assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0); assertEquals(metrics.get(".test_store--success_request_key_count.Max").value(), 1.0); assertTrue(metrics.get(".test_store--success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--success_request_key_ratio.SimpleRatioStat").value() < 1.0); @@ -105,7 +105,7 @@ public void testBatchGet() throws ExecutionException, InterruptedException { assertTrue(metrics.get(".test_store--multiget_healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 1.0); // round (2.0 / 3) + assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 2.0); assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Max").value(), 2.0); assertTrue(metrics.get(".test_store--multiget_success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--multiget_success_request_key_ratio.SimpleRatioStat").value() < 1.0); diff --git a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java index af97e007252..a5e7a6a1983 100644 --- a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java +++ b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java @@ -177,9 +177,8 @@ private CompletableFuture recordRequestMetrics( clientStats.recordResponseDeserializationTime(requestContext.responseDeserializationTime); } } - // We want to record the partial success key count number, no matter the request is healthy or unhealthy. - int successKeyCount = requestContext.successRequestKeyCount.get(); - clientStats.recordSuccessRequestKeyCount(successKeyCount); + // We want to record the response key count number, no matter the request is healthy or unhealthy. + clientStats.recordResponseKeyCount(requestContext.successRequestKeyCount.get()); if (requestContext.noAvailableReplica) { clientStats.recordNoAvailableReplicaRequest(); @@ -201,8 +200,7 @@ private CompletableFuture recordRequestMetrics( if (!exceptionReceived) { if (getRequestContext.retryContext.retryWin) { clientStats.recordRetryRequestWin(); - retrySuccessKeyCount = 1; - clientStats.recordRetryRequestSuccessKeyCount(retrySuccessKeyCount); + clientStats.recordRetryRequestSuccessKeyCount(1); } } } @@ -217,8 +215,7 @@ private CompletableFuture recordRequestMetrics( clientStats.recordRetryRequestKeyCount(retryRequestContext.numKeysInRequest); clientStats.recordRetryFanoutSize(retryRequestContext.getFanoutSize()); if (!exceptionReceived) { - retrySuccessKeyCount = retryRequestContext.numKeysCompleted.get(); - clientStats.recordRetryRequestSuccessKeyCount(retrySuccessKeyCount); + clientStats.recordRetryRequestSuccessKeyCount(retryRequestContext.numKeysCompleted.get()); if (retryRequestContext.numKeysCompleted.get() > 0) { clientStats.recordRetryRequestWin(); } @@ -226,9 +223,6 @@ private CompletableFuture recordRequestMetrics( } } - // numberOfKeys = successKeyCount + retrySuccessKeyCount + failedKeyCount. - clientStats.recordFailedRequestKeyCount(numberOfKeys - successKeyCount - retrySuccessKeyCount, throwable); - if (exceptionReceived) { // throw an exception after incrementing some error related metrics if (throwable instanceof VeniceClientException) { diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index 76adeaaf509..f204f7632d9 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -2,17 +2,13 @@ import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.HttpResponseStatusEnum.transformIntToHttpResponseStatusEnum; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.stats.dimensions.MessageType.*; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.*; import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.FAIL; import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.SUCCESS; import static com.linkedin.venice.stats.metrics.ModuleMetricEntityInterface.getUniqueMetricEntities; import static com.linkedin.venice.utils.CollectionUtils.setOf; import static org.apache.hc.core5.http.HttpStatus.SC_NOT_FOUND; -import static org.apache.hc.core5.http.HttpStatus.SC_NO_CONTENT; import static org.apache.hc.core5.http.HttpStatus.SC_OK; import com.linkedin.venice.client.exceptions.VeniceClientHttpException; @@ -26,6 +22,7 @@ import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository; import com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory; import com.linkedin.venice.stats.dimensions.HttpResponseStatusEnum; +import com.linkedin.venice.stats.dimensions.MessageType; import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; import com.linkedin.venice.stats.metrics.MetricEntity; @@ -73,12 +70,10 @@ public class BasicClientStats extends AbstractVeniceHttpStats { private final MetricEntityStateOneEnum unhealthyRequestMetricForDavinciClient; private final MetricEntityStateOneEnum healthyLatencyMetricForDavinciClient; private final MetricEntityStateOneEnum unhealthyLatencyMetricForDavinciClient; - private final Sensor requestKeyCountSensor; - private final MetricEntityStateThreeEnums healthyKeyCountMetric; - private final MetricEntityStateThreeEnums unhealthyKeyCountMetric; - private final MetricEntityStateOneEnum healthyKeyCountMetricForDavinciClient; - private final MetricEntityStateOneEnum unhealthyKeyCountMetricForDavinciClient; - private final Sensor successRequestKeyCountSensor; + private final MetricEntityStateOneEnum requestKeyCount; + private final MetricEntityStateOneEnum responseKeyCount; + private final MetricEntityStateOneEnum requestKeyCountDvc; + private final MetricEntityStateOneEnum responseKeyCountDvc; private final Sensor successRequestRatioSensor; private final Sensor successRequestKeyRatioSensor; private final Rate requestRate = new OccurrenceRate(); @@ -132,6 +127,7 @@ protected BasicClientStats( // requestSensor will be a derived metric in OTel requestSensor = registerSensor("request", requestRate); Rate healthyRequestRate = new OccurrenceRate(); + Rate requestKeyCountRate = new Rate(); if (clientType.equals(ClientType.DAVINCI_CLIENT)) { healthyRequestMetricForDavinciClient = MetricEntityStateOneEnum.create( @@ -177,30 +173,30 @@ protected BasicClientStats( getBaseDimensionsMap(), VeniceResponseStatusCategory.class); - healthyKeyCountMetricForDavinciClient = MetricEntityStateOneEnum.create( + requestKeyCountDvc = MetricEntityStateOneEnum.create( BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), getOtelRepository(), this::registerSensor, - BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, - Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), + BasicClientTehutiMetricName.REQUEST_KEY_COUNT, + Arrays.asList(requestKeyCountRate, new Avg(), new Max()), baseDimensionsMap, - VeniceResponseStatusCategory.class); + MessageType.class); - unhealthyKeyCountMetricForDavinciClient = MetricEntityStateOneEnum.create( + responseKeyCountDvc = MetricEntityStateOneEnum.create( BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), otelRepository, this::registerSensor, - BasicClientTehutiMetricName.FAILED_REQUEST_KEY_COUNT, - Arrays.asList(new Rate(), new Avg(), new Max()), + BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, + Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), baseDimensionsMap, - VeniceResponseStatusCategory.class); + MessageType.class); healthyRequestMetric = null; unhealthyRequestMetric = null; healthyLatencyMetric = null; unhealthyLatencyMetric = null; - healthyKeyCountMetric = null; - unhealthyKeyCountMetric = null; + requestKeyCount = null; + responseKeyCount = null; } else { healthyRequestMetric = MetricEntityStateThreeEnums.create( BasicClientMetricEntity.CALL_COUNT.getMetricEntity(), @@ -251,46 +247,38 @@ protected BasicClientStats( HttpResponseStatusEnum.class, HttpResponseStatusCodeCategory.class, VeniceResponseStatusCategory.class); - // key count - healthyKeyCountMetric = MetricEntityStateThreeEnums.create( + + // request key count + requestKeyCount = MetricEntityStateOneEnum.create( BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), getOtelRepository(), this::registerSensor, - BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, - Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), + BasicClientTehutiMetricName.REQUEST_KEY_COUNT, + Arrays.asList(requestKeyCountRate, new Avg(), new Max()), baseDimensionsMap, - HttpResponseStatusEnum.class, - HttpResponseStatusCodeCategory.class, - VeniceResponseStatusCategory.class); - unhealthyKeyCountMetric = MetricEntityStateThreeEnums.create( + MessageType.class); + + responseKeyCount = MetricEntityStateOneEnum.create( BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), otelRepository, this::registerSensor, - BasicClientTehutiMetricName.FAILED_REQUEST_KEY_COUNT, - Arrays.asList(new Rate(), new Avg(), new Max()), + BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, + Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), baseDimensionsMap, - HttpResponseStatusEnum.class, - HttpResponseStatusCodeCategory.class, - VeniceResponseStatusCategory.class); + MessageType.class); healthyRequestMetricForDavinciClient = null; unhealthyRequestMetricForDavinciClient = null; healthyLatencyMetricForDavinciClient = null; unhealthyLatencyMetricForDavinciClient = null; - healthyKeyCountMetricForDavinciClient = null; - unhealthyKeyCountMetricForDavinciClient = null; + requestKeyCountDvc = null; + responseKeyCountDvc = null; } // successRequestRatioSensor will be a derived metric in OTel successRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "success_request_ratio")); - // key count - Rate requestKeyCountRate = new Rate(); - requestKeyCountSensor = registerSensor("request_key_count", requestKeyCountRate, new Avg(), new Max()); - successRequestKeyCountSensor = - registerSensor("success_request_key_count", successRequestKeyCountRate, new Avg(), new Max()); - successRequestKeyRatioSensor = registerSensor( new TehutiUtils.SimpleRatioStat(successRequestKeyCountRate, requestKeyCountRate, "success_request_key_ratio")); } @@ -345,39 +333,15 @@ public void emitUnhealthyRequestMetricsForDavinciClient(double latency) { } public void recordRequestKeyCount(int keyCount) { - requestKeyCountSensor.record(keyCount); - } - - public void recordSuccessRequestKeyCount(int successKeyCount) { - successRequestKeyCountSensor.record(successKeyCount); - if (ClientType.isDavinciClient(this.clientType)) { - healthyKeyCountMetricForDavinciClient.record(successKeyCount, SUCCESS); - } else { - int httpStatus = getHealthyRequestHttpStatus(successKeyCount); - HttpResponseStatusEnum statusEnum = transformIntToHttpResponseStatusEnum(httpStatus); - HttpResponseStatusCodeCategory httpCategory = getVeniceHttpResponseStatusCodeCategory(httpStatus); - healthyKeyCountMetric.record(successKeyCount, statusEnum, httpCategory, SUCCESS); - } - } - - public void recordFailedRequestKeyCount(int failedKeyCount, Throwable throwable) { - if (ClientType.isDavinciClient(this.clientType)) { - unhealthyKeyCountMetricForDavinciClient.record(failedKeyCount, FAIL); - } else { - /** - * When throwable is null and the failed key count is 0, it means that the request was successful. However, - * we still need to record the failed key count as 0, and thus we use a default http status of SC_NO_CONTENT - * to indicate success. - */ - int httpStatus = throwable != null ? getUnhealthyRequestHttpStatus(throwable) : SC_NO_CONTENT; - HttpResponseStatusEnum statusEnum = transformIntToHttpResponseStatusEnum(httpStatus); - HttpResponseStatusCodeCategory httpCategory = getVeniceHttpResponseStatusCodeCategory(httpStatus); - unhealthyKeyCountMetric.record(failedKeyCount, statusEnum, httpCategory, FAIL); - } + MetricEntityStateOneEnum keyCountMetric = + ClientType.isDavinciClient(this.clientType) ? requestKeyCountDvc : requestKeyCount; + keyCountMetric.record(keyCount, REQUEST); } - public void recordFailedRequestKeyCount(int failedKeyCount) { - recordFailedRequestKeyCount(failedKeyCount, null); + public void recordResponseKeyCount(int keyCount) { + MetricEntityStateOneEnum keyCountMetric = + ClientType.isDavinciClient(this.clientType) ? responseKeyCountDvc : responseKeyCount; + keyCountMetric.record(keyCount, RESPONSE); } protected final Rate getRequestRate() { @@ -438,8 +402,8 @@ private Map getBaseDimensionsMap() { * Metric names for tehuti metrics used in this class. */ public enum BasicClientTehutiMetricName implements TehutiMetricNameEnum { - HEALTHY_REQUEST, UNHEALTHY_REQUEST, HEALTHY_REQUEST_LATENCY, UNHEALTHY_REQUEST_LATENCY, SUCCESS_REQUEST_KEY_COUNT, - FAILED_REQUEST_KEY_COUNT; + HEALTHY_REQUEST, UNHEALTHY_REQUEST, HEALTHY_REQUEST_LATENCY, UNHEALTHY_REQUEST_LATENCY, REQUEST_KEY_COUNT, + SUCCESS_REQUEST_KEY_COUNT; private final String metricName; @@ -480,16 +444,11 @@ public enum BasicClientMetricEntity implements ModuleMetricEntityInterface { VENICE_RESPONSE_STATUS_CODE_CATEGORY) ), /** - * Count of keys during response handling along with response codes + * Count of keys for venice client request and response. */ KEY_COUNT( - MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys during response handling along with response codes", - setOf( - VENICE_STORE_NAME, - VENICE_REQUEST_METHOD, - HTTP_RESPONSE_STATUS_CODE, - HTTP_RESPONSE_STATUS_CODE_CATEGORY, - VENICE_RESPONSE_STATUS_CODE_CATEGORY) + MetricType.HISTOGRAM, MetricUnit.NUMBER, "Count of keys for venice client request and response", + setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE) ), /** * Count of all DaVinci requests: as DaVinci is local reads, we do not track HTTP response codes @@ -508,12 +467,12 @@ public enum BasicClientMetricEntity implements ModuleMetricEntityInterface { setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY) ), /** - * Count of keys during response handling along with response codes + * Count of keys for all DaVinci Client request and response */ KEY_COUNT_DVC( KEY_COUNT.name().toLowerCase(), MetricType.HISTOGRAM, MetricUnit.NUMBER, - "Count of keys for all DaVinci Client responses", - setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY) + "Count of keys for all DaVinci Client request and response", + setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE) ); private final MetricEntity metricEntity; diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java index 090c616a145..52cbbff715e 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/store/StatTrackingStoreClient.java @@ -97,7 +97,7 @@ public CompletableFuture get(K key) { CompletableFuture innerFuture = super.get(key, Optional.of(singleGetStats), startTimeInNS); singleGetStats.recordRequestKeyCount(1); CompletableFuture statFuture = innerFuture - .handle((BiFunction) getStatCallback(singleGetStats, 1, startTimeInNS)); + .handle((BiFunction) getStatCallback(singleGetStats, startTimeInNS)); return AppTimeOutTrackingCompletableFuture.track(statFuture, singleGetStats); } @@ -107,7 +107,7 @@ public CompletableFuture getRaw(String requestPath) { CompletableFuture innerFuture = super.getRaw(requestPath, Optional.of(schemaReaderStats), startTimeInNS); schemaReaderStats.recordRequestKeyCount(1); CompletableFuture statFuture = innerFuture.handle( - (BiFunction) getStatCallback(schemaReaderStats, 1, startTimeInNS)); + (BiFunction) getStatCallback(schemaReaderStats, startTimeInNS)); return statFuture; } @@ -182,7 +182,6 @@ private static class StatTrackingStreamingCallback extends DelegatingTrack private final Optional statsOptional; private final long preRequestTimeInNS; private final StreamingResponseTracker streamingResponseTracker; - private final int keyCnt; public StatTrackingStreamingCallback( StreamingCallback callback, @@ -193,7 +192,6 @@ public StatTrackingStreamingCallback( this.stats = stats; this.statsOptional = Optional.of(stats); this.preRequestTimeInNS = preRequestTimeInNS; - this.keyCnt = keyCnt; streamingResponseTracker = new StreamingResponseTracker(stats, keyCnt, preRequestTimeInNS); } @@ -217,7 +215,6 @@ public void onDeserializationCompletion( preRequestTimeInNS, exception, successKeyCount, - keyCnt, duplicateEntryCount); } } @@ -260,7 +257,6 @@ private static void handleMetricTrackingForStreamingCallback( long startTimeInNS, Optional exception, int successKeyCnt, - int keyCnt, int duplicateEntryCnt) { double latency = LatencyUtils.getElapsedTimeFromNSToMS(startTimeInNS); if (exception.isPresent()) { @@ -268,9 +264,7 @@ private static void handleMetricTrackingForStreamingCallback( } else { clientStats.emitHealthyRequestMetrics(latency, successKeyCnt); } - clientStats.recordSuccessRequestKeyCount(successKeyCnt); - // keyCnt = successKeyCnt + failedKeyCnt - clientStats.recordFailedRequestKeyCount(keyCnt - successKeyCnt); + clientStats.recordResponseKeyCount(successKeyCnt); clientStats.recordSuccessDuplicateRequestKeyCount(duplicateEntryCnt); } @@ -286,7 +280,6 @@ public ComputeRequestBuilder compute() throws VeniceClientException { public static BiFunction getStatCallback( ClientStats clientStats, - int keyCount, long startTimeInNS) { return (T value, Throwable throwable) -> { double latency = LatencyUtils.getElapsedTimeFromNSToMS(startTimeInNS); @@ -296,9 +289,7 @@ public ComputeRequestBuilder compute() throws VeniceClientException { } clientStats.emitHealthyRequestMetrics(latency, value); - int successfulKeyCount = getSuccessfulKeyCount(value); - clientStats.recordSuccessRequestKeyCount(successfulKeyCount); - clientStats.recordFailedRequestKeyCount(keyCount - successfulKeyCount, throwable); + clientStats.recordResponseKeyCount(getSuccessfulKeyCount(value)); return value; }; } diff --git a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java index c6e926fcdee..6e1af246776 100644 --- a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java +++ b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java @@ -7,16 +7,10 @@ import static com.linkedin.venice.stats.VeniceMetricsRepository.getVeniceMetricsRepository; import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.HttpResponseStatusEnum.transformIntToHttpResponseStatusEnum; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.stats.dimensions.MessageType.*; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.*; import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.*; -import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getExponentialHistogramPointData; -import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getLongPointData; -import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.validateExponentialHistogramPointData; -import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.validateLongPointData; +import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.*; import static org.apache.http.HttpStatus.SC_OK; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; @@ -25,6 +19,7 @@ import com.linkedin.venice.client.store.ClientConfig; import com.linkedin.venice.stats.ClientType; import com.linkedin.venice.stats.VeniceMetricsRepository; +import com.linkedin.venice.stats.dimensions.MessageType; import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; import com.linkedin.venice.stats.metrics.MetricEntity; import com.linkedin.venice.stats.metrics.MetricType; @@ -148,33 +143,35 @@ public void testEmitUnhealthyRequestMetricsForDavinciClientWithWrongClientType() } @Test(dataProviderClass = DataProviderUtils.class, dataProvider = "True-and-False") - public void testKeyCountMetricsForDaVinciClient(boolean isSuccess) { + public void testKeyCountMetricsForDaVinciClient(boolean isRequest) { InMemoryMetricReader inMemoryMetricReader = InMemoryMetricReader.create(); BasicClientStats stats = createStats(inMemoryMetricReader, DAVINCI_CLIENT); + int keyCount = 10; - if (isSuccess) { - stats.recordSuccessRequestKeyCount(keyCount); + + if (isRequest) { + stats.recordRequestKeyCount(keyCount); } else { - stats.recordFailedRequestKeyCount(keyCount); + stats.recordResponseKeyCount(keyCount); } // Check Tehuti metrics Map metrics = stats.getMetricsRepository().metrics(); String storeName = "test_store"; - if (isSuccess) { + if (isRequest) { + Assert.assertEquals((int) metrics.get(String.format(".%s--request_key_count.Max", storeName)).value(), keyCount); + } else { Assert.assertEquals( (int) metrics.get(String.format(".%s--success_request_key_count.Max", storeName)).value(), keyCount); - // We don't have failure key count metrics for key count in Tehuti. } // Check OpenTelemetry metrics Collection metricsData = inMemoryMetricReader.collectAllMetrics(); - Attributes expectedAttributes = getExpectedAttributes(storeName, -1, isSuccess ? SUCCESS : FAIL); - + Attributes expectedAttr = getAttributes(storeName, isRequest ? REQUEST : RESPONSE); ExponentialHistogramPointData data = getExponentialHistogramPointData(metricsData, "key_count", DAVINCI_CLIENT.getMetricsPrefix()); - validateExponentialHistogramPointData(data, keyCount, keyCount, 1, keyCount, expectedAttributes); + validateExponentialHistogramPointData(data, keyCount, keyCount, 1, keyCount, expectedAttr); } private BasicClientStats createStats(InMemoryMetricReader inMemoryMetricReader, ClientType clientType) { @@ -369,4 +366,12 @@ private Attributes getExpectedAttributes( } return builder.build(); } + + private Attributes getAttributes(String storeName, MessageType type) { + AttributesBuilder builder = Attributes.builder() + .put(VENICE_STORE_NAME.getDimensionNameInDefaultFormat(), storeName) + .put(VENICE_REQUEST_METHOD.getDimensionNameInDefaultFormat(), SINGLE_GET.getDimensionValue()) + .put(VENICE_MESSAGE_TYPE.getDimensionNameInDefaultFormat(), type.getDimensionValue()); + return builder.build(); + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/MessageType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/MessageType.java new file mode 100644 index 00000000000..fbfae6a0b7a --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/MessageType.java @@ -0,0 +1,28 @@ +package com.linkedin.venice.stats.dimensions; + +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_MESSAGE_TYPE; + + +public enum MessageType implements VeniceDimensionInterface { + REQUEST, RESPONSE; + + private final String type; + + MessageType() { + this.type = name().toLowerCase(); + } + + /** + * All the instances of this Enum should have the same dimension name. + * Refer {@link VeniceDimensionInterface#getDimensionName()} for more details. + */ + @Override + public VeniceMetricsDimensions getDimensionName() { + return VENICE_MESSAGE_TYPE; + } + + @Override + public String getDimensionValue() { + return type; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java index e92342e45a7..349214dfba1 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -27,6 +27,8 @@ public enum VeniceMetricsDimensions { /** {@link RequestRetryType} */ VENICE_REQUEST_RETRY_TYPE("venice.request.retry_type"), + VENICE_MESSAGE_TYPE("venice.message.type"), + /** {@link RequestRetryAbortReason} */ VENICE_REQUEST_RETRY_ABORT_REASON("venice.request.retry_abort_reason"); From e215da9fad1a4b2421049da12dea795cd8a94ea8 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Mon, 19 May 2025 15:45:50 -0700 Subject: [PATCH 04/11] fix unit tests --- .../venice/client/stats/BasicClientStatsTest.java | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java index 6e1af246776..722810fc0ea 100644 --- a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java +++ b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java @@ -275,21 +275,16 @@ public void testClientMetricEntities() { "key_count", MetricType.HISTOGRAM, MetricUnit.NUMBER, - "Count of keys during response handling along with response codes", - Utils.setOf( - VENICE_STORE_NAME, - VENICE_REQUEST_METHOD, - HTTP_RESPONSE_STATUS_CODE, - HTTP_RESPONSE_STATUS_CODE_CATEGORY, - VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + "Count of keys for venice client request and response", + Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE))); expectedMetrics.put( BasicClientStats.BasicClientMetricEntity.KEY_COUNT_DVC, new MetricEntity( "key_count", MetricType.HISTOGRAM, MetricUnit.NUMBER, - "Count of keys for all DaVinci Client responses", - Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY))); + "Count of keys for all DaVinci Client request and response", + Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE))); Set uniqueMetricEntitiesNames = new HashSet<>(); for (BasicClientStats.BasicClientMetricEntity metric: BasicClientStats.BasicClientMetricEntity.values()) { From 859d1a1fe0d53c32ce67367f02dfbaabad695a22 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Mon, 19 May 2025 15:57:39 -0700 Subject: [PATCH 05/11] fix unit test VeniceMetricsDimensionsTest --- .../stats/dimensions/VeniceMetricsDimensionsTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java index 56a5ab48f6c..9234947590c 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java @@ -37,6 +37,9 @@ public void testGetDimensionNameInSnakeCase() { case VENICE_REQUEST_RETRY_ABORT_REASON: assertEquals(dimension.getDimensionName(format), "venice.request.retry_abort_reason"); break; + case VENICE_MESSAGE_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.message.type"); + break; default: throw new IllegalArgumentException("Unknown dimension: " + dimension); } @@ -72,6 +75,9 @@ public void testGetDimensionNameInCamelCase() { case VENICE_REQUEST_RETRY_ABORT_REASON: assertEquals(dimension.getDimensionName(format), "venice.request.retryAbortReason"); break; + case VENICE_MESSAGE_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.message.type"); + break; default: throw new IllegalArgumentException("Unknown dimension: " + dimension); } @@ -107,6 +113,9 @@ public void testGetDimensionNameInPascalCase() { case VENICE_REQUEST_RETRY_ABORT_REASON: assertEquals(dimension.getDimensionName(format), "Venice.Request.RetryAbortReason"); break; + case VENICE_MESSAGE_TYPE: + assertEquals(dimension.getDimensionName(format), "Venice.Message.Type"); + break; default: throw new IllegalArgumentException("Unknown dimension: " + dimension); } From 0a09a0b54697c715609e3fb0efd2eb5d93a604ed Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Mon, 19 May 2025 16:24:49 -0700 Subject: [PATCH 06/11] remove unused local var --- .../linkedin/venice/fastclient/StatsAvroGenericStoreClient.java | 1 - 1 file changed, 1 deletion(-) diff --git a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java index a5e7a6a1983..02b978f2f50 100644 --- a/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java +++ b/clients/venice-client/src/main/java/com/linkedin/venice/fastclient/StatsAvroGenericStoreClient.java @@ -184,7 +184,6 @@ private CompletableFuture recordRequestMetrics( clientStats.recordNoAvailableReplicaRequest(); } - int retrySuccessKeyCount = 0; if (requestContext instanceof GetRequestContext) { GetRequestContext getRequestContext = (GetRequestContext) requestContext; From db0b8397ff0826c3e70c1c49565064e2f21c314e Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Tue, 20 May 2025 10:46:42 -0700 Subject: [PATCH 07/11] fix imports --- .../linkedin/venice/client/stats/BasicClientStats.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index f204f7632d9..23fc51d82be 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -2,8 +2,14 @@ import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.HttpResponseStatusEnum.transformIntToHttpResponseStatusEnum; -import static com.linkedin.venice.stats.dimensions.MessageType.*; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.*; +import static com.linkedin.venice.stats.dimensions.MessageType.REQUEST; +import static com.linkedin.venice.stats.dimensions.MessageType.RESPONSE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_MESSAGE_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.FAIL; import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.SUCCESS; import static com.linkedin.venice.stats.metrics.ModuleMetricEntityInterface.getUniqueMetricEntities; From ec812948bf4e7a679150048e1674f4ac590e1a26 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Thu, 22 May 2025 12:56:39 -0700 Subject: [PATCH 08/11] address review comments --- .../client/StatsAvroGenericDaVinciClient.java | 11 +++--- .../venice/client/stats/BasicClientStats.java | 38 +------------------ .../client/stats/BasicClientStatsTest.java | 22 +++++------ .../dimensions/VeniceMetricsDimensions.java | 1 + .../stats/dimensions/MessageTypeTest.java | 24 ++++++++++++ 5 files changed, 43 insertions(+), 53 deletions(-) create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/MessageTypeTest.java diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java index 30df70a3204..e24cad72f5b 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClient.java @@ -81,9 +81,8 @@ public CompletableFuture get(K key) { public CompletableFuture get(K key, V reusableValue) { clientStatsForSingleGet.recordRequestKeyCount(1); return trackRequest(clientStatsForSingleGet, () -> super.get(key, reusableValue)).whenComplete((v, throwable) -> { - if (throwable == null && v != null) { - clientStatsForSingleGet.recordResponseKeyCount(1); - } + int responseKeyCount = (throwable == null && v != null) ? 1 : 0; + clientStatsForSingleGet.recordResponseKeyCount(responseKeyCount); }); } @@ -91,9 +90,9 @@ public CompletableFuture get(K key, V reusableValue) { public CompletableFuture> batchGet(Set keys) { clientStatsForBatchGet.recordRequestKeyCount(keys.size()); return trackRequest(clientStatsForBatchGet, () -> super.batchGet(keys)).whenComplete((v, throwable) -> { - if (v != null) { - clientStatsForBatchGet.recordResponseKeyCount(v.size()); - } + // Always record the response key count number, no matter the request is healthy or not. + int responseKeyCount = (v != null) ? v.size() : 0; + clientStatsForBatchGet.recordResponseKeyCount(responseKeyCount); }); } } diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index 23fc51d82be..8aa298d1060 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -78,8 +78,6 @@ public class BasicClientStats extends AbstractVeniceHttpStats { private final MetricEntityStateOneEnum unhealthyLatencyMetricForDavinciClient; private final MetricEntityStateOneEnum requestKeyCount; private final MetricEntityStateOneEnum responseKeyCount; - private final MetricEntityStateOneEnum requestKeyCountDvc; - private final MetricEntityStateOneEnum responseKeyCountDvc; private final Sensor successRequestRatioSensor; private final Sensor successRequestKeyRatioSensor; private final Rate requestRate = new OccurrenceRate(); @@ -179,24 +177,6 @@ protected BasicClientStats( getBaseDimensionsMap(), VeniceResponseStatusCategory.class); - requestKeyCountDvc = MetricEntityStateOneEnum.create( - BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), - getOtelRepository(), - this::registerSensor, - BasicClientTehutiMetricName.REQUEST_KEY_COUNT, - Arrays.asList(requestKeyCountRate, new Avg(), new Max()), - baseDimensionsMap, - MessageType.class); - - responseKeyCountDvc = MetricEntityStateOneEnum.create( - BasicClientMetricEntity.KEY_COUNT_DVC.getMetricEntity(), - otelRepository, - this::registerSensor, - BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, - Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), - baseDimensionsMap, - MessageType.class); - healthyRequestMetric = null; unhealthyRequestMetric = null; healthyLatencyMetric = null; @@ -277,8 +257,6 @@ protected BasicClientStats( unhealthyRequestMetricForDavinciClient = null; healthyLatencyMetricForDavinciClient = null; unhealthyLatencyMetricForDavinciClient = null; - requestKeyCountDvc = null; - responseKeyCountDvc = null; } // successRequestRatioSensor will be a derived metric in OTel @@ -339,15 +317,11 @@ public void emitUnhealthyRequestMetricsForDavinciClient(double latency) { } public void recordRequestKeyCount(int keyCount) { - MetricEntityStateOneEnum keyCountMetric = - ClientType.isDavinciClient(this.clientType) ? requestKeyCountDvc : requestKeyCount; - keyCountMetric.record(keyCount, REQUEST); + requestKeyCount.record(keyCount, REQUEST); } public void recordResponseKeyCount(int keyCount) { - MetricEntityStateOneEnum keyCountMetric = - ClientType.isDavinciClient(this.clientType) ? responseKeyCountDvc : responseKeyCount; - keyCountMetric.record(keyCount, RESPONSE); + responseKeyCount.record(keyCount, RESPONSE); } protected final Rate getRequestRate() { @@ -471,14 +445,6 @@ public enum BasicClientMetricEntity implements ModuleMetricEntityInterface { CALL_TIME.name().toLowerCase(), MetricType.HISTOGRAM, MetricUnit.MILLISECOND, "Latency for all DaVinci Client responses", setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_RESPONSE_STATUS_CODE_CATEGORY) - ), - /** - * Count of keys for all DaVinci Client request and response - */ - KEY_COUNT_DVC( - KEY_COUNT.name().toLowerCase(), MetricType.HISTOGRAM, MetricUnit.NUMBER, - "Count of keys for all DaVinci Client request and response", - setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE) ); private final MetricEntity metricEntity; diff --git a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java index 722810fc0ea..b33349d5c5b 100644 --- a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java +++ b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java @@ -8,9 +8,17 @@ import static com.linkedin.venice.stats.dimensions.HttpResponseStatusCodeCategory.getVeniceHttpResponseStatusCodeCategory; import static com.linkedin.venice.stats.dimensions.HttpResponseStatusEnum.transformIntToHttpResponseStatusEnum; import static com.linkedin.venice.stats.dimensions.MessageType.*; -import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.*; -import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.*; -import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.*; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_MESSAGE_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory.SUCCESS; +import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getExponentialHistogramPointData; +import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.getLongPointData; +import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.validateExponentialHistogramPointData; +import static com.linkedin.venice.utils.OpenTelemetryDataPointTestUtils.validateLongPointData; import static org.apache.http.HttpStatus.SC_OK; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; @@ -277,14 +285,6 @@ public void testClientMetricEntities() { MetricUnit.NUMBER, "Count of keys for venice client request and response", Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE))); - expectedMetrics.put( - BasicClientStats.BasicClientMetricEntity.KEY_COUNT_DVC, - new MetricEntity( - "key_count", - MetricType.HISTOGRAM, - MetricUnit.NUMBER, - "Count of keys for all DaVinci Client request and response", - Utils.setOf(VENICE_STORE_NAME, VENICE_REQUEST_METHOD, VENICE_MESSAGE_TYPE))); Set uniqueMetricEntitiesNames = new HashSet<>(); for (BasicClientStats.BasicClientMetricEntity metric: BasicClientStats.BasicClientMetricEntity.values()) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java index 349214dfba1..141a5b2dbd9 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -27,6 +27,7 @@ public enum VeniceMetricsDimensions { /** {@link RequestRetryType} */ VENICE_REQUEST_RETRY_TYPE("venice.request.retry_type"), + /** {@link com.linkedin.venice.stats.dimensions.MessageType} */ VENICE_MESSAGE_TYPE("venice.message.type"), /** {@link RequestRetryAbortReason} */ diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/MessageTypeTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/MessageTypeTest.java new file mode 100644 index 00000000000..4cbc0550080 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/dimensions/MessageTypeTest.java @@ -0,0 +1,24 @@ +package com.linkedin.venice.stats.dimensions; + +import com.linkedin.venice.utils.CollectionUtils; +import java.util.Map; + + +public class MessageTypeTest extends VeniceDimensionInterfaceTest { + protected MessageTypeTest() { + super(MessageType.class); + } + + @Override + protected VeniceMetricsDimensions expectedDimensionName() { + return VeniceMetricsDimensions.VENICE_MESSAGE_TYPE; + } + + @Override + protected Map expectedDimensionValueMapping() { + return CollectionUtils.mapBuilder() + .put(MessageType.REQUEST, "request") + .put(MessageType.RESPONSE, "response") + .build(); + } +} From e71d5254759f392e9331f9e4af542dec88b14d67 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Thu, 22 May 2025 15:03:43 -0700 Subject: [PATCH 09/11] fix unit tests --- .../venice/client/stats/BasicClientStats.java | 41 +++++++------- .../client/stats/BasicClientStatsTest.java | 56 ++++++++++--------- 2 files changed, 49 insertions(+), 48 deletions(-) diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index 8aa298d1060..3cd0fbe7ddd 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -181,8 +181,6 @@ protected BasicClientStats( unhealthyRequestMetric = null; healthyLatencyMetric = null; unhealthyLatencyMetric = null; - requestKeyCount = null; - responseKeyCount = null; } else { healthyRequestMetric = MetricEntityStateThreeEnums.create( BasicClientMetricEntity.CALL_COUNT.getMetricEntity(), @@ -233,32 +231,31 @@ protected BasicClientStats( HttpResponseStatusEnum.class, HttpResponseStatusCodeCategory.class, VeniceResponseStatusCategory.class); - - // request key count - requestKeyCount = MetricEntityStateOneEnum.create( - BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), - getOtelRepository(), - this::registerSensor, - BasicClientTehutiMetricName.REQUEST_KEY_COUNT, - Arrays.asList(requestKeyCountRate, new Avg(), new Max()), - baseDimensionsMap, - MessageType.class); - - responseKeyCount = MetricEntityStateOneEnum.create( - BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), - otelRepository, - this::registerSensor, - BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, - Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), - baseDimensionsMap, - MessageType.class); - healthyRequestMetricForDavinciClient = null; unhealthyRequestMetricForDavinciClient = null; healthyLatencyMetricForDavinciClient = null; unhealthyLatencyMetricForDavinciClient = null; } + // request key count + requestKeyCount = MetricEntityStateOneEnum.create( + BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), + getOtelRepository(), + this::registerSensor, + BasicClientTehutiMetricName.REQUEST_KEY_COUNT, + Arrays.asList(requestKeyCountRate, new Avg(), new Max()), + baseDimensionsMap, + MessageType.class); + + responseKeyCount = MetricEntityStateOneEnum.create( + BasicClientMetricEntity.KEY_COUNT.getMetricEntity(), + otelRepository, + this::registerSensor, + BasicClientTehutiMetricName.SUCCESS_REQUEST_KEY_COUNT, + Arrays.asList(successRequestKeyCountRate, new Avg(), new Max()), + baseDimensionsMap, + MessageType.class); + // successRequestRatioSensor will be a derived metric in OTel successRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "success_request_ratio")); diff --git a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java index b33349d5c5b..9b04e6966c8 100644 --- a/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java +++ b/clients/venice-thin-client/src/test/java/com/linkedin/venice/client/stats/BasicClientStatsTest.java @@ -151,35 +151,39 @@ public void testEmitUnhealthyRequestMetricsForDavinciClientWithWrongClientType() } @Test(dataProviderClass = DataProviderUtils.class, dataProvider = "True-and-False") - public void testKeyCountMetricsForDaVinciClient(boolean isRequest) { - InMemoryMetricReader inMemoryMetricReader = InMemoryMetricReader.create(); - BasicClientStats stats = createStats(inMemoryMetricReader, DAVINCI_CLIENT); - - int keyCount = 10; + public void testKeyCountMetrics(boolean isRequest) { + for (ClientType client: ClientType.values()) { + // verify that the following works for all client types. + InMemoryMetricReader inMemoryMetricReader = InMemoryMetricReader.create(); + BasicClientStats stats = createStats(inMemoryMetricReader, client); + + int keyCount = 10; + + if (isRequest) { + stats.recordRequestKeyCount(keyCount); + } else { + stats.recordResponseKeyCount(keyCount); + } - if (isRequest) { - stats.recordRequestKeyCount(keyCount); - } else { - stats.recordResponseKeyCount(keyCount); - } + // Check Tehuti metrics + Map metrics = stats.getMetricsRepository().metrics(); + String storeName = "test_store"; + if (isRequest) { + Assert + .assertEquals((int) metrics.get(String.format(".%s--request_key_count.Max", storeName)).value(), keyCount); + } else { + Assert.assertEquals( + (int) metrics.get(String.format(".%s--success_request_key_count.Max", storeName)).value(), + keyCount); + } - // Check Tehuti metrics - Map metrics = stats.getMetricsRepository().metrics(); - String storeName = "test_store"; - if (isRequest) { - Assert.assertEquals((int) metrics.get(String.format(".%s--request_key_count.Max", storeName)).value(), keyCount); - } else { - Assert.assertEquals( - (int) metrics.get(String.format(".%s--success_request_key_count.Max", storeName)).value(), - keyCount); + // Check OpenTelemetry metrics + Collection metricsData = inMemoryMetricReader.collectAllMetrics(); + Attributes expectedAttr = getAttributes(storeName, isRequest ? REQUEST : RESPONSE); + ExponentialHistogramPointData data = + getExponentialHistogramPointData(metricsData, "key_count", client.getMetricsPrefix()); + validateExponentialHistogramPointData(data, keyCount, keyCount, 1, keyCount, expectedAttr); } - - // Check OpenTelemetry metrics - Collection metricsData = inMemoryMetricReader.collectAllMetrics(); - Attributes expectedAttr = getAttributes(storeName, isRequest ? REQUEST : RESPONSE); - ExponentialHistogramPointData data = - getExponentialHistogramPointData(metricsData, "key_count", DAVINCI_CLIENT.getMetricsPrefix()); - validateExponentialHistogramPointData(data, keyCount, keyCount, 1, keyCount, expectedAttr); } private BasicClientStats createStats(InMemoryMetricReader inMemoryMetricReader, ClientType clientType) { From c958ff389292a101953e9465837d1b466633bab6 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Thu, 22 May 2025 17:10:24 -0700 Subject: [PATCH 10/11] fix dvc client unit tests --- .../davinci/client/StatsAvroGenericDaVinciClientTest.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java index 2e0591fe90f..38b21067e33 100644 --- a/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java +++ b/clients/da-vinci-client/src/test/java/com/linkedin/davinci/client/StatsAvroGenericDaVinciClientTest.java @@ -65,7 +65,9 @@ public void testGet(boolean reuseApi) throws ExecutionException, InterruptedExce assertTrue(metrics.get(".test_store--healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0); + // we have 2 requests, one success and one failure and we would record the key count for the success request as 1 + // and the key count for the failure request as 0. + assertEquals(metrics.get(".test_store--success_request_key_count.Avg").value(), 1.0 / 2); assertEquals(metrics.get(".test_store--success_request_key_count.Max").value(), 1.0); assertTrue(metrics.get(".test_store--success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--success_request_key_ratio.SimpleRatioStat").value() < 1.0); @@ -105,7 +107,9 @@ public void testBatchGet() throws ExecutionException, InterruptedException { assertTrue(metrics.get(".test_store--multiget_healthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_unhealthy_request.OccurrenceRate").value() > 0); assertTrue(metrics.get(".test_store--multiget_healthy_request_latency.Avg").value() > 0); - assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 2.0); + // We have 3 batch get requests, one success with 2 keys, one failure, and one with run time exception. + // Key count for the success one is 2, failure one is 0, and the run time exception one is never recorded. + assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Avg").value(), 2.0 / 2); assertEquals(metrics.get(".test_store--multiget_success_request_key_count.Max").value(), 2.0); assertTrue(metrics.get(".test_store--multiget_success_request_ratio.SimpleRatioStat").value() < 1.0); assertTrue(metrics.get(".test_store--multiget_success_request_key_ratio.SimpleRatioStat").value() < 1.0); From 3b985dde5a41462ef018efc4897d305e624beef4 Mon Sep 17 00:00:00 2001 From: Lei Lu Date: Thu, 22 May 2025 18:29:08 -0700 Subject: [PATCH 11/11] address comments --- .../java/com/linkedin/venice/client/stats/BasicClientStats.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java index 3cd0fbe7ddd..a8023a957d5 100644 --- a/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java +++ b/clients/venice-thin-client/src/main/java/com/linkedin/venice/client/stats/BasicClientStats.java @@ -133,7 +133,7 @@ protected BasicClientStats( Rate healthyRequestRate = new OccurrenceRate(); Rate requestKeyCountRate = new Rate(); - if (clientType.equals(ClientType.DAVINCI_CLIENT)) { + if (ClientType.isDavinciClient(clientType)) { healthyRequestMetricForDavinciClient = MetricEntityStateOneEnum.create( BasicClientMetricEntity.CALL_COUNT_DVC.getMetricEntity(), otelRepository,