Skip to content

SOLR-17628: Add query quantiles metrics to prometheus endpoint #3164

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 7, 2025
Merged
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ Improvements

* SOLR-17746: Provide long form --jettyconfig option to go with -j in bin/solr scripts. (Eric Pugh, Rahul Goswami)

* SOLR-17628: Export metric timers via `wt=prometheus` as Prometheus summaries. (Jude Muriithi, Matthew Biscocho)

Optimizations
---------------------
* SOLR-17578: Remove ZkController internal core supplier, for slightly faster reconnection after Zookeeper session loss. (Pierre Salagnac)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,24 @@

import com.codahale.metrics.Meter;
import com.codahale.metrics.Metric;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.Timer;
import io.prometheus.metrics.model.snapshots.CounterSnapshot;
import io.prometheus.metrics.model.snapshots.Exemplars;
import io.prometheus.metrics.model.snapshots.GaugeSnapshot;
import io.prometheus.metrics.model.snapshots.Labels;
import io.prometheus.metrics.model.snapshots.MetricMetadata;
import io.prometheus.metrics.model.snapshots.MetricSnapshot;
import io.prometheus.metrics.model.snapshots.MetricSnapshots;
import io.prometheus.metrics.model.snapshots.Quantile;
import io.prometheus.metrics.model.snapshots.Quantiles;
import io.prometheus.metrics.model.snapshots.SummarySnapshot;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.util.stats.MetricUtils;

/**
* Base class for all {@link SolrPrometheusFormatter} holding {@link MetricSnapshot}s. Can export
Expand All @@ -38,10 +45,12 @@
public abstract class SolrPrometheusFormatter {
protected final Map<String, List<CounterSnapshot.CounterDataPointSnapshot>> metricCounters;
protected final Map<String, List<GaugeSnapshot.GaugeDataPointSnapshot>> metricGauges;
protected final Map<String, List<SummarySnapshot.SummaryDataPointSnapshot>> metricSummaries;

public SolrPrometheusFormatter() {
this.metricCounters = new HashMap<>();
this.metricGauges = new HashMap<>();
this.metricSummaries = new HashMap<>();
}

/**
Expand Down Expand Up @@ -93,18 +102,36 @@ public void exportCounter(
}

/**
* Export {@link Timer} ands its mean rate to {@link
* io.prometheus.metrics.model.snapshots.GaugeSnapshot.GaugeDataPointSnapshot} and collect
* Export {@link Timer} ands its quantile data to {@link
* io.prometheus.metrics.model.snapshots.SummarySnapshot.SummaryDataPointSnapshot} and collect
* datapoint
*
* @param metricName name of prometheus metric
* @param dropwizardMetric the {@link Timer} to be exported
* @param labels label names and values to record
*/
public void exportTimer(String metricName, Timer dropwizardMetric, Labels labels) {
GaugeSnapshot.GaugeDataPointSnapshot dataPoint =
createGaugeDatapoint(dropwizardMetric.getSnapshot().getMean(), labels);
collectGaugeDatapoint(metricName, dataPoint);
Snapshot snapshot = dropwizardMetric.getSnapshot();

long count = snapshot.size();
double sum =
Arrays.stream(snapshot.getValues())
.asDoubleStream()
.map(num -> MetricUtils.nsToMs(num))
.sum();

Quantiles quantiles =
Quantiles.of(
List.of(
new Quantile(0.50, MetricUtils.nsToMs(snapshot.getMedian())),
new Quantile(0.75, MetricUtils.nsToMs(snapshot.get75thPercentile())),
new Quantile(0.99, MetricUtils.nsToMs(snapshot.get99thPercentile())),
new Quantile(0.999, MetricUtils.nsToMs(snapshot.get999thPercentile()))));

var summary =
new SummarySnapshot.SummaryDataPointSnapshot(
count, sum, quantiles, labels, Exemplars.EMPTY, 0L);
collectSummaryDatapoint(metricName, summary);
}

/**
Expand Down Expand Up @@ -206,20 +233,44 @@ public void collectGaugeDatapoint(
metricGauges.get(metricName).add(dataPoint);
}

/**
* Collects {@link io.prometheus.metrics.model.snapshots.SummarySnapshot.SummaryDataPointSnapshot}
* and appends to existing metric or create new metric if name does not exist
*
* @param metricName Name of metric
* @param dataPoint Gauge datapoint to be collected
*/
public void collectSummaryDatapoint(
String metricName, SummarySnapshot.SummaryDataPointSnapshot dataPoint) {
metricSummaries.computeIfAbsent(metricName, k -> new ArrayList<>()).add(dataPoint);
}

/**
* Returns an immutable {@link MetricSnapshots} from the {@link
* io.prometheus.metrics.model.snapshots.DataPointSnapshot}s collected from the registry
*/
public MetricSnapshots collect() {
ArrayList<MetricSnapshot> snapshots = new ArrayList<>();
for (String metricName : metricCounters.keySet()) {
snapshots.add(
new CounterSnapshot(new MetricMetadata(metricName), metricCounters.get(metricName)));
}
for (String metricName : metricGauges.keySet()) {
snapshots.add(
new GaugeSnapshot(new MetricMetadata(metricName), metricGauges.get(metricName)));
}

metricCounters
.entrySet()
.forEach(
entry ->
snapshots.add(
new CounterSnapshot(new MetricMetadata(entry.getKey()), entry.getValue())));
metricGauges
.entrySet()
.forEach(
entry ->
snapshots.add(
new GaugeSnapshot(new MetricMetadata(entry.getKey()), entry.getValue())));
metricSummaries
.entrySet()
.forEach(
entry ->
snapshots.add(
new SummarySnapshot(new MetricMetadata(entry.getKey()), entry.getValue())));

return new MetricSnapshots(snapshots);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
public class SolrCoreHandlerMetric extends SolrCoreMetric {
public static final String CORE_REQUESTS_TOTAL = "solr_metrics_core_requests";
public static final String CORE_REQUESTS_UPDATE_HANDLER = "solr_metrics_core_update_handler";
public static final String CORE_REQUESTS_TOTAL_TIME = "solr_metrics_core_requests_time";
public static final String CORE_REQUEST_TIMES = "solr_metrics_core_average_request_time";
public static final String CORE_REQUEST_TIMES = "solr_metrics_core_request_time_ms";

public SolrCoreHandlerMetric(Metric dropwizardMetric, String metricName) {
super(dropwizardMetric, metricName);
Expand Down Expand Up @@ -58,10 +57,6 @@ public void toPrometheus(SolrPrometheusFormatter formatter) {
} else if (dropwizardMetric instanceof Counter) {
if (metricName.endsWith("requests")) {
formatter.exportCounter(CORE_REQUESTS_TOTAL, (Counter) dropwizardMetric, getLabels());
} else if (metricName.endsWith("totalTime")) {
// Do not need type label for total time
labels.remove("type");
formatter.exportCounter(CORE_REQUESTS_TOTAL_TIME, (Counter) dropwizardMetric, getLabels());
}
} else if (dropwizardMetric instanceof Gauge) {
if (!metricName.endsWith("handlerStart")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/** Dropwizard metrics of name SEARCHER.* */
public class SolrCoreSearcherMetric extends SolrCoreMetric {
public static final String CORE_SEARCHER_METRICS = "solr_metrics_core_searcher_documents";
public static final String CORE_SEARCHER_TIMES = "solr_metrics_core_average_searcher_warmup_time";
public static final String CORE_SEARCHER_TIMES = "solr_metrics_core_searcher_warmup_time_ms";

public SolrCoreSearcherMetric(Metric dropwizardMetric, String metricName) {
super(dropwizardMetric, metricName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import io.prometheus.metrics.model.snapshots.Labels;
import io.prometheus.metrics.model.snapshots.MetricSnapshot;
import io.prometheus.metrics.model.snapshots.MetricSnapshots;
import io.prometheus.metrics.model.snapshots.SummarySnapshot;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -730,12 +731,12 @@ public void testPrometheusMetricsCore() throws Exception {
assertNotNull(actualSnapshots);

MetricSnapshot actualSnapshot =
getMetricSnapshot(actualSnapshots, "solr_metrics_core_average_request_time");
GaugeSnapshot.GaugeDataPointSnapshot actualGaugeDataPoint =
getGaugeDatapointSnapshot(
getMetricSnapshot(actualSnapshots, "solr_metrics_core_request_time_ms");
SummarySnapshot.SummaryDataPointSnapshot actualSummaryDataPoint =
getSummaryDataPointSnapshot(
actualSnapshot,
Labels.of("category", "QUERY", "core", "collection1", "handler", "/select[shard]"));
assertEquals(0, actualGaugeDataPoint.getValue(), 0);
assertEquals(0, (float) actualSummaryDataPoint.getCount(), 0);

actualSnapshot = getMetricSnapshot(actualSnapshots, "solr_metrics_core_requests");
CounterSnapshot.CounterDataPointSnapshot actualCounterDataPoint =
Expand All @@ -753,7 +754,7 @@ public void testPrometheusMetricsCore() throws Exception {
assertEquals(0, actualCounterDataPoint.getValue(), 0);

actualSnapshot = getMetricSnapshot(actualSnapshots, "solr_metrics_core_cache");
actualGaugeDataPoint =
GaugeSnapshot.GaugeDataPointSnapshot actualGaugeDataPoint =
getGaugeDatapointSnapshot(
actualSnapshot,
Labels.of("cacheType", "fieldValueCache", "core", "collection1", "item", "hits"));
Expand All @@ -766,13 +767,6 @@ public void testPrometheusMetricsCore() throws Exception {
Labels.of("item", "default", "core", "collection1", "type", "SolrFragmenter"));
assertEquals(0, actualCounterDataPoint.getValue(), 0);

actualSnapshot = getMetricSnapshot(actualSnapshots, "solr_metrics_core_requests_time");
actualCounterDataPoint =
getCounterDatapointSnapshot(
actualSnapshot,
Labels.of("category", "QUERY", "core", "collection1", "handler", "/select[shard]"));
assertEquals(0, actualCounterDataPoint.getValue(), 0);

actualSnapshot = getMetricSnapshot(actualSnapshots, "solr_metrics_core_searcher_documents");
actualGaugeDataPoint =
getGaugeDatapointSnapshot(
Expand All @@ -795,11 +789,11 @@ public void testPrometheusMetricsCore() throws Exception {
assertEquals(0, actualGaugeDataPoint.getValue(), 0);

actualSnapshot =
getMetricSnapshot(actualSnapshots, "solr_metrics_core_average_searcher_warmup_time");
actualGaugeDataPoint =
getGaugeDatapointSnapshot(
getMetricSnapshot(actualSnapshots, "solr_metrics_core_searcher_warmup_time_ms");
actualSummaryDataPoint =
getSummaryDataPointSnapshot(
actualSnapshot, Labels.of("core", "collection1", "type", "warmup"));
assertEquals(0, actualGaugeDataPoint.getValue(), 0);
assertEquals(0, (float) actualSummaryDataPoint.getCount(), 0);

handler.close();
}
Expand Down Expand Up @@ -1183,6 +1177,15 @@ private CounterSnapshot.CounterDataPointSnapshot getCounterDatapointSnapshot(
.get();
}

private SummarySnapshot.SummaryDataPointSnapshot getSummaryDataPointSnapshot(
MetricSnapshot snapshot, Labels labels) {
return (SummarySnapshot.SummaryDataPointSnapshot)
snapshot.getDataPoints().stream()
.filter(ss -> ss.getLabels().hasSameValues(labels))
.findAny()
.get();
}

static class RefreshablePluginHolder extends PluginBag.PluginHolder<SolrRequestHandler> {

private DumpRequestHandler rh;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import io.prometheus.metrics.model.snapshots.CounterSnapshot;
import io.prometheus.metrics.model.snapshots.GaugeSnapshot;
import io.prometheus.metrics.model.snapshots.Labels;
import io.prometheus.metrics.model.snapshots.SummarySnapshot;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -84,11 +85,11 @@ public void testExportTimer() throws InterruptedException {

Labels expectedLabels = Labels.of("test", "test-value");
testFormatter.exportTimer(expectedMetricName, metric, expectedLabels);
assertTrue(testFormatter.getMetricGauges().containsKey(expectedMetricName));
assertTrue(testFormatter.getMetricSummaries().containsKey(expectedMetricName));

GaugeSnapshot.GaugeDataPointSnapshot actual =
testFormatter.getMetricGauges().get("test_metric").get(0);
assertEquals(5000000000L, actual.getValue(), 500000000L);
SummarySnapshot.SummaryDataPointSnapshot actual =
testFormatter.getMetricSummaries().get("test_metric").get(0);
assertEquals(5000L, actual.getSum(), 500L);
assertEquals(expectedLabels, actual.getLabels());
}

Expand Down Expand Up @@ -199,5 +200,9 @@ public Map<String, List<CounterSnapshot.CounterDataPointSnapshot>> getMetricCoun
public Map<String, List<GaugeSnapshot.GaugeDataPointSnapshot>> getMetricGauges() {
return metricGauges;
}

public Map<String, List<SummarySnapshot.SummaryDataPointSnapshot>> getMetricSummaries() {
return metricSummaries;
}
}
}
Loading