Skip to content

Commit 01923db

Browse files
Remove agent name from metrics
1 parent 76a6068 commit 01923db

File tree

2 files changed

+27
-41
lines changed

2 files changed

+27
-41
lines changed

src/server/metrics.rs

Lines changed: 23 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ use crate::experiments::{Assignee, Experiment};
33
use crate::prelude::*;
44
use crate::server::agents::Agent;
55
use chrono::{DateTime, Utc};
6-
use prometheus::proto::{Metric, MetricFamily};
76
use prometheus::{HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec};
87

98
const JOBS_METRIC: &str = "crater_completed_jobs_total";
@@ -26,7 +25,7 @@ impl Metrics {
2625
pub fn new() -> Fallible<Self> {
2726
let jobs_opts = prometheus::opts!(JOBS_METRIC, "total completed jobs");
2827
let crater_completed_jobs_total =
29-
prometheus::register_int_counter_vec!(jobs_opts, &["agent", "experiment"])?;
28+
prometheus::register_int_counter_vec!(jobs_opts, &["experiment"])?;
3029
let crater_bounced_record_progress = prometheus::register_int_counter!(
3130
"crater_bounced_record_progress",
3231
"hits with full record progress queue"
@@ -63,39 +62,15 @@ impl Metrics {
6362
.inc_by(1);
6463
}
6564

66-
pub fn record_completed_jobs(&self, agent: &str, experiment: &str, amount: u64) {
65+
pub fn record_completed_jobs(&self, experiment: &str, amount: u64) {
6766
self.crater_completed_jobs_total
68-
.with_label_values(&[agent, experiment])
67+
.with_label_values(&[experiment])
6968
.inc_by(amount);
7069
}
7170

72-
fn get_metric_by_name(name: &str) -> Option<MetricFamily> {
73-
let families = prometheus::gather();
74-
families.into_iter().find(|fam| fam.get_name() == name)
75-
}
76-
77-
fn get_label_by_name<'a>(metric: &'a Metric, label: &str) -> Option<&'a str> {
78-
metric
79-
.get_label()
80-
.iter()
81-
.find(|lab| lab.get_name() == label)
82-
.map(|lab| lab.get_value())
83-
}
84-
8571
fn remove_experiment_jobs(&self, experiment: &str) -> Fallible<()> {
86-
if let Some(metric) = Self::get_metric_by_name(JOBS_METRIC) {
87-
let agents = metric
88-
.get_metric()
89-
.iter()
90-
.filter(|met| Self::get_label_by_name(met, "experiment").unwrap() == experiment)
91-
.map(|met| Self::get_label_by_name(met, "agent").unwrap())
92-
.collect::<Vec<&str>>();
93-
94-
for agent in agents.iter() {
95-
self.crater_completed_jobs_total
96-
.remove_label_values(&[agent, experiment])?;
97-
}
98-
}
72+
self.crater_completed_jobs_total
73+
.remove_label_values(&[experiment])?;
9974

10075
Ok(())
10176
}
@@ -143,12 +118,27 @@ mod tests {
143118
use crate::server::tokens::Tokens;
144119
use chrono::Utc;
145120
use lazy_static::lazy_static;
146-
use prometheus::proto::MetricFamily;
121+
use prometheus::proto::{Metric, MetricFamily};
147122

148123
lazy_static! {
149124
static ref METRICS: Metrics = Metrics::new().unwrap();
150125
}
151126

127+
impl Metrics {
128+
fn get_metric_by_name(name: &str) -> Option<MetricFamily> {
129+
let families = prometheus::gather();
130+
families.into_iter().find(|fam| fam.get_name() == name)
131+
}
132+
133+
fn get_label_by_name<'a>(metric: &'a Metric, label: &str) -> Option<&'a str> {
134+
metric
135+
.get_label()
136+
.iter()
137+
.find(|lab| lab.get_name() == label)
138+
.map(|lab| lab.get_value())
139+
}
140+
}
141+
152142
fn test_experiment_presence(metric: &MetricFamily, experiment: &str) -> bool {
153143
metric
154144
.get_metric()
@@ -160,12 +150,9 @@ mod tests {
160150
fn test_on_complete_experiment() {
161151
let ex1 = "pr-0";
162152
let ex2 = "pr-1";
163-
let agent1 = "agent-1";
164-
let agent2 = "agent-2";
165153

166-
METRICS.record_completed_jobs(agent1, ex1, 1);
167-
METRICS.record_completed_jobs(agent2, ex1, 1);
168-
METRICS.record_completed_jobs(agent2, ex2, 1);
154+
METRICS.record_completed_jobs(ex1, 1);
155+
METRICS.record_completed_jobs(ex2, 1);
169156

170157
//test metrics are correctly registered
171158
let jobs = Metrics::get_metric_by_name(JOBS_METRIC).unwrap();

src/server/routes/agent.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ fn endpoint_next_crate(
167167
#[derive(Clone)]
168168
pub struct RecordProgressThread {
169169
// String is the worker name
170-
queue: Sender<(ExperimentData<ProgressData>, String)>,
170+
queue: Sender<ExperimentData<ProgressData>>,
171171
in_flight_requests: Arc<(Mutex<usize>, Condvar)>,
172172
}
173173

@@ -189,7 +189,7 @@ impl RecordProgressThread {
189189
// Panics should already be logged and otherwise there's not much we
190190
// can/should do.
191191
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
192-
let (result, worker_name) = rx.recv().unwrap();
192+
let result = rx.recv().unwrap();
193193
this.block_until_idle();
194194

195195
let start = std::time::Instant::now();
@@ -204,7 +204,6 @@ impl RecordProgressThread {
204204
}
205205

206206
metrics.record_completed_jobs(
207-
&worker_name,
208207
&ex.name,
209208
result.data.results.len() as u64,
210209
);
@@ -300,12 +299,12 @@ impl Drop for RequestGuard {
300299
fn endpoint_record_progress(
301300
result: ExperimentData<ProgressData>,
302301
data: Arc<Data>,
303-
auth: AuthDetails,
302+
_auth: AuthDetails,
304303
) -> Fallible<Response<Body>> {
305304
match data
306305
.record_progress_worker
307306
.queue
308-
.try_send((result, auth.name))
307+
.try_send(result)
309308
{
310309
Ok(()) => Ok(ApiResponse::Success { result: true }.into_response()?),
311310
Err(crossbeam_channel::TrySendError::Full(_)) => {

0 commit comments

Comments
 (0)