Skip to content

Update metric docs #5844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: revamp-storage-metrics
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 157 additions & 40 deletions docs/reference/metrics.md

Large diffs are not rendered by default.

38 changes: 12 additions & 26 deletions monitoring/grafana/dashboards/indexers.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
Expand Down Expand Up @@ -745,7 +746,7 @@
"uid": "${datasource}"
},
"editorMode": "builder",
"expr": "sum by(pod) (rate(quickwit_storage_object_storage_upload_num_bytes{namespace=\"$namespace\", pod=~\"$pod\", instance=~\"$instance\"}[$__rate_interval]))",
"expr": "sum by(pod) (rate(quickwit_storage_object_storage_upload_num_bytes{instance=~\"$instance\"}[$__rate_interval]))",
"hide": false,
"legendFormat": "Upload bytes / sec - {{pod}}",
"range": true,
Expand Down Expand Up @@ -806,8 +807,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
Expand Down Expand Up @@ -910,8 +910,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
Expand Down Expand Up @@ -965,23 +964,12 @@
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "builder",
"expr": "sum(rate(quickwit_storage_object_storage_gets_total{instance=~\"$instance\"}[$__rate_interval]))",
"legendFormat": "GET req/sec",
"range": true,
"refId": "Download"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "builder",
"expr": "sum(rate(quickwit_storage_object_storage_puts_total{namespace=\"$namespace\", pod=~\"$pod\", instance=~\"$instance\"}[$__rate_interval]))",
"editorMode": "code",
"expr": "sum(rate(quickwit_storage_object_storage_requests_total{instance=~\"$instance\"}[$__rate_interval])) by (action)",
"hide": false,
"legendFormat": "PUT req/sec",
"legendFormat": "{{action}} req/sec",
"range": true,
"refId": "Upload"
"refId": "Requests"
}
],
"title": "Requests on object storage",
Expand Down Expand Up @@ -1034,8 +1022,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
Expand Down Expand Up @@ -1130,8 +1117,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
Expand Down Expand Up @@ -1191,7 +1177,7 @@
"list": [
{
"current": {
"selected": true,
"selected": false,
"text": "Prometheus",
"value": "PBFA97CFB590B2093"
},
Expand Down Expand Up @@ -1245,6 +1231,6 @@
"timezone": "",
"title": "Quickwit Indexers",
"uid": "quickwit-indexers",
"version": 2,
"version": 6,
"weekStart": ""
}
29 changes: 14 additions & 15 deletions monitoring/grafana/dashboards/searchers.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"id": 4,
"links": [],
"liveNow": false,
"panels": [
Expand Down Expand Up @@ -307,18 +307,18 @@
"uid": "${datasource}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(quickwit_storage_object_storage_gets_total{instance=~\"$instance\"}[$__rate_interval])",
"editorMode": "code",
"expr": "sum(rate(quickwit_storage_object_storage_requests_total{instance=~\"$instance\"}[$__rate_interval])) by (action)",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "Total",
"legendFormat": "{{action}} req/sec",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Number of GET requests",
"title": "Object store requests",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -407,18 +407,18 @@
"uid": "${datasource}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "quickwit_storage_object_storage_download_num_bytes{instance=~\"$instance\"}",
"editorMode": "code",
"expr": "rate(quickwit_storage_object_storage_download_num_bytes{instance=~\"$instance\"}[$__rate_interval])",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Downloaded bytes",
"legendFormat": "Download bytes / sec ",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Size of GET requests (bytes)",
"title": "Object store download rate",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -506,8 +506,8 @@
"uid": "${datasource}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(quickwit_cache_cache_hits_total{instance=~\"$instance\"}[$__rate_interval])",
"editorMode": "code",
"expr": "sum(rate(quickwit_cache_cache_hits_total{instance=~\"$instance\"}[$__rate_interval])) by (component_name)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
Expand Down Expand Up @@ -710,7 +710,7 @@
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Split footer",
"legendFormat": "{{component_name}}",
"range": true,
"refId": "A",
"useBackend": false
Expand Down Expand Up @@ -810,7 +810,7 @@
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Split footer",
"legendFormat": "{{component_name}}",
"range": true,
"refId": "A",
"useBackend": false
Expand Down Expand Up @@ -874,7 +874,6 @@
"sort": 0,
"type": "query"
}

]
},
"time": {
Expand All @@ -885,6 +884,6 @@
"timezone": "",
"title": "Quickwit Searchers",
"uid": "quickwit-searchers",
"version": 1,
"version": 4,
"weekStart": ""
}
2 changes: 2 additions & 0 deletions quickwit/quickwit-common/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ pub struct InFlightDataGauges {
pub doc_processor_mailbox: IntGauge,
pub indexer_mailbox: IntGauge,
pub index_writer: IntGauge,
pub get_object: IntGauge,
in_flight_gauge_vec: IntGaugeVec<1>,
}

Expand All @@ -365,6 +366,7 @@ impl Default for InFlightDataGauges {
doc_processor_mailbox: in_flight_gauge_vec.with_label_values(["doc_processor_mailbox"]),
indexer_mailbox: in_flight_gauge_vec.with_label_values(["indexer_mailbox"]),
index_writer: in_flight_gauge_vec.with_label_values(["index_writer"]),
get_object: in_flight_gauge_vec.with_label_values(["get_object"]),
in_flight_gauge_vec: in_flight_gauge_vec.clone(),
}
}
Expand Down
17 changes: 16 additions & 1 deletion quickwit/quickwit-ingest/src/ingest_v2/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use mrecordlog::ResourceUsage;
use once_cell::sync::Lazy;
use quickwit_common::metrics::{
Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, exponential_buckets,
linear_buckets, new_counter_vec, new_gauge, new_gauge_vec, new_histogram, new_histogram_vec,
linear_buckets, new_counter, new_counter_vec, new_gauge, new_gauge_vec, new_histogram,
new_histogram_vec,
};

// Counter vec counting the different outcomes of ingest requests as
Expand Down Expand Up @@ -82,6 +83,8 @@ pub(super) struct IngestV2Metrics {
pub wal_disk_used_bytes: IntGauge,
pub wal_memory_used_bytes: IntGauge,
pub ingest_results: IngestResultMetrics,
pub replicated_num_bytes_total: IntCounter,
pub replicated_num_docs_total: IntCounter,
}

impl Default for IngestV2Metrics {
Expand Down Expand Up @@ -146,6 +149,18 @@ impl Default for IngestV2Metrics {
"ingest",
&[],
),
replicated_num_bytes_total: new_counter(
"replicated_num_bytes_total",
"Total size in bytes of the replicated docs.",
"ingest",
&[],
),
replicated_num_docs_total: new_counter(
"replicated_num_docs_total",
"Total number of docs replicated.",
"ingest",
&[],
),
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions quickwit/quickwit-ingest/src/ingest_v2/replication.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ use super::metrics::report_wal_usage;
use super::models::IngesterShard;
use super::mrecordlog_utils::check_enough_capacity;
use super::state::IngesterState;
use crate::ingest_v2::metrics::INGEST_V2_METRICS;
use crate::ingest_v2::mrecordlog_utils::{AppendDocBatchError, append_non_empty_doc_batch};
use crate::metrics::INGEST_METRICS;
use crate::{estimate_size, with_lock_metrics};

pub(super) const SYN_REPLICATION_STREAM_CAPACITY: usize = 5;
Expand Down Expand Up @@ -667,10 +667,10 @@ impl ReplicationTask {
.expect("replica shard should be initialized")
.set_replication_position_inclusive(current_position_inclusive.clone(), now);

INGEST_METRICS
INGEST_V2_METRICS
.replicated_num_bytes_total
.inc_by(batch_num_bytes);
INGEST_METRICS
INGEST_V2_METRICS
.replicated_num_docs_total
.inc_by(batch_num_docs);

Expand Down
28 changes: 3 additions & 25 deletions quickwit/quickwit-ingest/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,14 @@
// limitations under the License.

use once_cell::sync::Lazy;
use quickwit_common::metrics::{IntCounter, IntGauge, new_counter, new_counter_vec, new_gauge};
use quickwit_common::metrics::{IntCounter, new_counter_vec};

pub struct IngestMetrics {
// With ingest V1 all ingested documents are considered valid
pub ingested_docs_bytes_valid: IntCounter,
pub ingested_docs_valid: IntCounter,
pub ingested_docs_bytes_invalid: IntCounter,
pub ingested_docs_invalid: IntCounter,
pub ingested_docs_valid: IntCounter,

pub replicated_num_bytes_total: IntCounter,
pub replicated_num_docs_total: IntCounter,
Comment on lines -24 to -25
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved this to be part of of ingest_v2 metrics

#[allow(dead_code)] // this really shouldn't be dead, it needs to be used somewhere
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this has been dead for a year

pub queue_count: IntGauge,
}

impl Default for IngestMetrics {
Expand Down Expand Up @@ -56,24 +52,6 @@ impl Default for IngestMetrics {
ingested_docs_bytes_invalid,
ingested_docs_valid,
ingested_docs_invalid,
replicated_num_bytes_total: new_counter(
"replicated_num_bytes_total",
"Total size in bytes of the replicated docs.",
"ingest",
&[],
),
replicated_num_docs_total: new_counter(
"replicated_num_docs_total",
"Total number of docs replicated.",
"ingest",
&[],
),
queue_count: new_gauge(
"queue_count",
"Number of queues currently active",
"ingest",
&[],
),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-serve/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ impl Default for ServeMetrics {
),
ongoing_requests: new_gauge_vec(
"ongoing_requests",
"Number of ongoing requests.",
"Number of ongoing requests on specific endpoint groups",
"",
&[],
["endpoint_group"],
),
pending_requests: new_gauge_vec(
"pending_requests",
"Number of pending requests.",
"Number of pending requests on specific endpoint groups",
"",
&[],
["endpoint_group"],
Expand Down
Loading