Skip to content

SOLR-17806: Migrate DirectUpdateHandler2 metrics to OTEL #3417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: feature/SOLR-17458
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ public class DirectUpdateHandler2 extends UpdateHandler
AttributedLongUpDownCounter addCommandsCumulative;

// Maintenance operations
AttributedLongUpDownCounter expungeDeleteCommands;
AttributedLongUpDownCounter mergeIndexesCommands;
AttributedLongUpDownCounter commitCommands;
AttributedLongUpDownCounter optimizeCommands;
AttributedLongCounter expungeDeleteCommands;
AttributedLongCounter mergeIndexesCommands;
AttributedLongCounter commitCommands;
AttributedLongCounter optimizeCommands;

AttributedLongCounter numErrorsCumulative;

Expand Down Expand Up @@ -246,7 +246,7 @@ public void initializeMetrics(

var baseCommandsMetric =
solrMetricsContext.longUpDownCounter(
"solr_metrics_core_update_operations_cumulative",
"solr_core_update_operations_cumulative",
"Cumulative number of update commands processed. Metric can go down from rollback command");

addCommandsCumulative =
Expand All @@ -262,27 +262,29 @@ public void initializeMetrics(
baseCommandsMetric,
baseAttributes.get().put(OPERATION_ATTR, "deletes_by_query").build());

var baseCommitMetric =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this a "base"; why do you refer to it in the metrics for commands & merges below other than "commits"?

solrMetricsContext.longCounter(
"solr_core_update_commit_operations", "Total number of commit operations");

commitCommands =
new AttributedLongUpDownCounter(
baseCommandsMetric, baseAttributes.get().put(OPERATION_ATTR, "commits").build());
new AttributedLongCounter(
baseCommitMetric, baseAttributes.get().put(OPERATION_ATTR, "commits").build());

optimizeCommands =
new AttributedLongUpDownCounter(
baseCommandsMetric, baseAttributes.get().put(OPERATION_ATTR, "optimize").build());
new AttributedLongCounter(
baseCommitMetric, baseAttributes.get().put(OPERATION_ATTR, "optimize").build());

mergeIndexesCommands =
new AttributedLongUpDownCounter(
baseCommandsMetric, baseAttributes.get().put(OPERATION_ATTR, "merges").build());
new AttributedLongCounter(
baseCommitMetric, baseAttributes.get().put(OPERATION_ATTR, "merges").build());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh this is a command and not merges generally. "mergeIndexes"? At least it's scoped by the OPERATION_ATTR so in-context, it ought to be clear.


expungeDeleteCommands =
new AttributedLongUpDownCounter(
baseCommandsMetric,
baseAttributes.get().put(OPERATION_ATTR, "expunge_deletes").build());
new AttributedLongCounter(
baseCommitMetric, baseAttributes.get().put(OPERATION_ATTR, "expunge_deletes").build());

var baseMaintenanceMetric =
solrMetricsContext.longCounter(
"solr_metrics_core_update_maintenance_operations",
"Total number of maintenance operations");
"solr_core_update_maintenance_operations", "Total number of maintenance operations");

rollbackCommands =
new AttributedLongCounter(
Expand All @@ -293,14 +295,13 @@ public void initializeMetrics(
baseMaintenanceMetric, baseAttributes.get().put(OPERATION_ATTR, "split").build());

var baseErrorsMetric =
solrMetricsContext.longCounter(
"solr_metrics_core_update_errors", "Total number of update errors");
solrMetricsContext.longCounter("solr_core_update_errors", "Total number of update errors");

numErrorsCumulative = new AttributedLongCounter(baseErrorsMetric, baseAttributes.get().build());

softAutoCommits =
solrMetricsContext.observableLongCounter(
"solr_metrics_core_update_auto_commits",
"solr_core_update_auto_commits",
"Total number of auto commits",
(observableLongMeasurement -> {
observableLongMeasurement.record(
Expand All @@ -316,7 +317,7 @@ public void initializeMetrics(
// rarely change.
commitStats =
solrMetricsContext.observableLongGauge(
"solr_metrics_core_update_commit_stats",
"solr_core_update_commit_stats",
"Metrics around commits",
(observableLongMeasurement -> {
if (commitTracker.getDocsUpperBound() > 0) {
Expand Down Expand Up @@ -349,7 +350,7 @@ public void initializeMetrics(

updateStats =
solrMetricsContext.observableLongGauge(
"solr_metrics_core_update_pending_operations",
"solr_core_update_pending_operations",
"Operations pending commit. Values get reset after commit",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be better to instead track "submitted", as separate from "committed" operations? Much like the started and completed merges -- the PR Kevin Liang is working on -- #3416

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm maybe that is better. Maybe I'll reduce this to only docs_pending for this gauge instead then we'll move to submitted and committed increasing counters. I'll refactor this around and see how it looks.

(observableLongMeasurement) -> {
observableLongMeasurement.record(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ public static DataPointSnapshot getDataPointSnapshot(
public static Supplier<Labels.Builder> getCloudLabelsBase(SolrCore core) {
return () ->
Labels.builder()
.label("collection", "tlog_replica_test_only_leader_indexes")
.label("shard", "shard1")
.label("collection", core.getCoreDescriptor().getCloudDescriptor().getCollectionName())
.label("shard", core.getCoreDescriptor().getCloudDescriptor().getShardId())
.label("core", core.getName())
.label(
"replica",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,35 +121,30 @@ public void testBasics() {
assertEquals(
"Did not have the expected number of pending adds",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as you went through the effort of writng these messages -- fine. But I wouldn't waste your time; it's optional.

2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "adds").getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative adds",
2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);

assertU(commit());

assertEquals(
"Did not have the expected number of cumulative commits",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits").getValue(),
0.0);
assertEquals(
"Did not have the expected number of pending adds after commit",
0,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "adds").getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative adds after commit",
2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);

// now they should be there
Expand All @@ -162,14 +157,13 @@ public void testBasics() {
assertEquals(
"Did not have the expected number of pending deletes_by_id",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_id")
.getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative deletes_by_id",
1,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_operations_cumulative", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "deletes_by_id")
.getValue(),
0.0);

Expand All @@ -181,14 +175,13 @@ public void testBasics() {
assertEquals(
"Did not have the expected number of pending deletes_by_id after commit",
0,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_id")
.getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative deletes_by_id after commit",
1,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_operations_cumulative", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "deletes_by_id")
.getValue(),
0.0);

Expand All @@ -202,15 +195,13 @@ public void testBasics() {
assertEquals(
"Did not have the expected number of pending deletes_by_id",
1,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_pending_operations", "deletes_by_query")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_query")
.getValue(),
0.0);
assertEquals(
"Did not have the expected number of pending cumulative deletes_by_id",
1,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_operations_cumulative", "deletes_by_query")
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "deletes_by_query")
.getValue(),
0.0);

Expand All @@ -222,33 +213,28 @@ public void testBasics() {
assertEquals(
"Did not have the expected number of pending pending deletes_by_query after commit",
0,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_pending_operations", "deletes_by_query")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_query")
.getValue(),
0.0);
assertEquals(
"Did not have the expected number of pending cumulative deletes_by_query after commit",
1,
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_operations_cumulative", "deletes_by_query")
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "deletes_by_query")
.getValue(),
0.0);

// 6 should be gone
assertQ(req("q", "id:6"), "//*[@numFound='0']");

// verify final metrics
var commits =
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits");
var pendingAdds =
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "adds");
var commits = getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits");
var pendingAdds = getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "adds");
var cumulativeAdds =
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds");
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds");
var pendingDeletesById =
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "deletes_by_id");
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_id");
var cumulativeDeletesById =
getGaugeOpDatapoint(
reader, "solr_metrics_core_update_operations_cumulative", "deletes_by_id");
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "deletes_by_id");

assertEquals(
"Did not have the expected number of cumulative commits", 3, commits.getValue(), 0.0);
Expand Down Expand Up @@ -294,26 +280,23 @@ public void testAddRollback() throws Exception {
assertEquals(
"Did not have the expected number of cumulative adds",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertNull(
"No commits should have been processed yet",
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits"));
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits"));

updater.commit(cmtCmd);
assertEquals(0, duh2.addCommands.longValue());
assertEquals(
"Did not have the expected number of cumulative adds",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative commits",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits").getValue(),
0.0);

ureq.close();
Expand All @@ -327,28 +310,26 @@ public void testAddRollback() throws Exception {
assertEquals(
"Did not have the expected number of cumulative adds",
2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertNull(
"No rollbacks should have been processed yet",
getGaugeOpDatapoint(reader, "solr_metrics_core_update_maintenance_operations", "rollback"));
getGaugeOpDatapoint(reader, "solr_core_update_maintenance_operations", "rollback"));

updater.rollback(rbkCmd);
assertEquals(0, duh2.addCommands.longValue());
assertEquals(
"Did not have the expected number of cumulative adds",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative rollback",
1,
((CounterSnapshot.CounterDataPointSnapshot)
SolrMetricTestUtils.getDataPointSnapshot(
reader,
"solr_metrics_core_update_maintenance_operations",
"solr_core_update_maintenance_operations",
SolrMetricTestUtils.getStandaloneLabelsBase(h.getCore())
.get()
.label("category", "UPDATE")
Expand Down Expand Up @@ -406,12 +387,11 @@ public void testDeleteRollback() throws Exception {
assertEquals(
"Did not have the expected number of cumulative adds",
2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertNull(
"No commits should have been processed yet",
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits"));
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits"));

updater.commit(cmtCmd);
assertEquals(0, duh2.addCommands.longValue());
Expand All @@ -420,14 +400,12 @@ public void testDeleteRollback() throws Exception {
assertEquals(
"Did not have the expected number of cumulative adds",
2,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "adds")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "adds").getValue(),
0.0);
assertEquals(
"Did not have the expected number of cumulative commits",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_operations_cumulative", "commits")
.getValue(),
getGaugeOpDatapoint(reader, "solr_core_update_operations_cumulative", "commits").getValue(),
0.0);
ureq.close();

Expand Down Expand Up @@ -461,20 +439,20 @@ public void testDeleteRollback() throws Exception {
assertEquals(
"Did not have the expected number of pending deletes_by_id",
1,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_id")
.getValue(),
0.0);
assertNull(
"No rollbacks should have been processed yet",
getGaugeOpDatapoint(reader, "solr_metrics_core_update_maintenance_operations", "rollback"));
getGaugeOpDatapoint(reader, "solr_core_update_maintenance_operations", "rollback"));

updater.rollback(rbkCmd);
ureq.close();
assertEquals(0, duh2.deleteByIdCommands.longValue());
assertEquals(
"Did not have the expected number of pending deletes_by_id",
0,
getGaugeOpDatapoint(reader, "solr_metrics_core_update_pending_operations", "deletes_by_id")
getGaugeOpDatapoint(reader, "solr_core_update_pending_operations", "deletes_by_id")
.getValue(),
0.0);
assertEquals(
Expand All @@ -483,7 +461,7 @@ public void testDeleteRollback() throws Exception {
((CounterSnapshot.CounterDataPointSnapshot)
SolrMetricTestUtils.getDataPointSnapshot(
reader,
"solr_metrics_core_update_maintenance_operations",
"solr_core_update_maintenance_operations",
SolrMetricTestUtils.getStandaloneLabelsBase(h.getCore())
.get()
.label("category", "UPDATE")
Expand Down