Skip to content

Commit 59ae8cf

Browse files
authored
Merge pull request #9095 from achouhan09/repl_met
Added metrices to show the replication status per bucket
2 parents ec3d16c + d95f694 commit 59ae8cf

File tree

3 files changed

+58
-11
lines changed

3 files changed

+58
-11
lines changed

src/server/analytic_services/prometheus_reports/noobaa_core_report.js

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,27 @@ const NOOBAA_CORE_METRICS = js_utils.deep_freeze([{
382382
help: 'Number of error objects replication_id in last replication cycle',
383383
labelNames: ['replication_id']
384384
}
385+
}, {
386+
type: 'Gauge',
387+
name: 'bucket_last_cycle_total_objects_num',
388+
configuration: {
389+
help: 'Total number of objects scanned per bucket in last replication cycle',
390+
labelNames: ['bucket_name']
391+
}
392+
}, {
393+
type: 'Gauge',
394+
name: 'bucket_last_cycle_replicated_objects_num',
395+
configuration: {
396+
help: 'Number of objects replicated per bucket in last replication cycle',
397+
labelNames: ['bucket_name']
398+
}
399+
}, {
400+
type: 'Gauge',
401+
name: 'bucket_last_cycle_error_objects_num',
402+
configuration: {
403+
help: 'Number of objects failed to replicate per bucket in last replication cycle',
404+
labelNames: ['bucket_name']
405+
}
385406
}, {
386407
type: 'Gauge',
387408
name: 'bucket_used_bytes',
@@ -606,9 +627,12 @@ class NooBaaCoreReport extends BasePrometheusReport {
606627
set_replication_status(repl_info) {
607628
if (!this._metrics) return;
608629
const replication_id = repl_info.replication_id;
630+
const bucket_name = repl_info.bucket_name;
609631
delete this._metrics.replication_status.hashMap[String(repl_info.replication_id)];
610632
this._metrics.replication_status.set(_.omit(repl_info, ['last_cycle_writes_size',
611-
'last_cycle_writes_num', 'last_cycle_error_writes_size', 'last_cycle_error_writes_num'
633+
'last_cycle_writes_num', 'last_cycle_error_writes_size', 'last_cycle_error_writes_num',
634+
'bucket_last_cycle_total_objects_num', 'bucket_last_cycle_replicated_objects_num',
635+
'bucket_last_cycle_error_objects_num'
612636
]), Date.now());
613637

614638
delete this._metrics.replication_last_cycle_writes_size.hashMap[String(repl_info.replication_id)];
@@ -622,6 +646,15 @@ class NooBaaCoreReport extends BasePrometheusReport {
622646

623647
delete this._metrics.replication_last_cycle_error_writes_num.hashMap[String(repl_info.replication_id)];
624648
this._metrics.replication_last_cycle_error_writes_num.set({ replication_id }, repl_info.last_cycle_error_writes_num);
649+
650+
delete this._metrics.bucket_last_cycle_total_objects_num.hashMap[String(bucket_name)];
651+
this._metrics.bucket_last_cycle_total_objects_num.set({ bucket_name }, repl_info.bucket_last_cycle_total_objects_num);
652+
653+
delete this._metrics.bucket_last_cycle_replicated_objects_num.hashMap[String(bucket_name)];
654+
this._metrics.bucket_last_cycle_replicated_objects_num.set({ bucket_name }, repl_info.bucket_last_cycle_replicated_objects_num);
655+
656+
delete this._metrics.bucket_last_cycle_error_objects_num.hashMap[String(bucket_name)];
657+
this._metrics.bucket_last_cycle_error_objects_num.set({ bucket_name }, repl_info.bucket_last_cycle_error_objects_num);
625658
}
626659
}
627660

src/server/bg_services/replication_scanner.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ class ReplicationScanner {
8686
for_replication: config.BUCKET_DIFF_FOR_REPLICATION
8787
});
8888
dbg.log1(`scan:: cur_src_cont_token: ${cur_src_cont_token},cur_dst_cont_token: ${cur_dst_cont_token}`);
89+
8990
const {
9091
keys_diff_map,
9192
first_bucket_cont_token: src_cont_token,
@@ -126,9 +127,10 @@ class ReplicationScanner {
126127

127128
// update the prometheus metrics only if we have diff
128129
if (Object.keys(keys_diff_map).length) {
129-
const replication_status = replication_utils.get_rule_status(rule.rule_id, src_cont_token, keys_diff_map, copy_res);
130+
const {rule_status, bucket_status} = replication_utils.get_rule_and_bucket_status(
131+
rule.rule_id, src_cont_token, keys_diff_map, copy_res);
130132

131-
replication_utils.update_replication_prom_report(src_bucket.name, replication_id, replication_status);
133+
replication_utils.update_replication_prom_report(src_bucket.name, replication_id, rule_status, bucket_status);
132134
}
133135
}));
134136
}

src/server/utils/replication_utils.js

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ const PARTIAL_SINGLE_BUCKET_REPLICATION_DEFAULTS = {
1616
last_cycle_writes_size: 0,
1717
last_cycle_error_writes_num: 0,
1818
last_cycle_error_writes_size: 0,
19+
bucket_last_cycle_total_objects_num: 0,
20+
bucket_last_cycle_replicated_objects_num: 0,
21+
bucket_last_cycle_error_objects_num: 0,
1922
};
2023

2124
//TODO: this function is not being used anymore, commenting out and keeping it as reference
@@ -39,7 +42,7 @@ const PARTIAL_SINGLE_BUCKET_REPLICATION_DEFAULTS = {
3942
// return false;
4043
// }
4144

42-
function get_rule_status(rule, src_cont_token, keys_diff_map, copy_res) {
45+
function get_rule_and_bucket_status(rule, src_cont_token, keys_diff_map, copy_res) {
4346
const { num_keys_to_copy, num_bytes_to_copy } = Object.entries(keys_diff_map).reduce(
4447
(acc, [key, value]) => {
4548
acc.num_keys_to_copy += value.length;
@@ -51,22 +54,31 @@ function get_rule_status(rule, src_cont_token, keys_diff_map, copy_res) {
5154
const num_keys_moved = copy_res.num_of_objects;
5255
const num_bytes_moved = copy_res.size_of_objects;
5356

54-
const status = {
57+
const rule_status = {
5558
last_cycle_rule_id: rule,
5659
last_cycle_writes_num: num_keys_moved,
5760
last_cycle_writes_size: num_bytes_moved,
5861
last_cycle_error_writes_num: num_keys_to_copy - num_keys_moved,
5962
last_cycle_error_writes_size: num_bytes_to_copy - num_bytes_moved,
6063
};
61-
if (src_cont_token) status.last_cycle_src_cont_token = src_cont_token;
62-
dbg.log1('get_rule_status: ', status);
63-
return status;
64+
if (src_cont_token) rule_status.last_cycle_src_cont_token = src_cont_token;
65+
dbg.log1('get_rule_and_bucket_status:: rule_status: ', rule_status);
66+
67+
const bucket_status = {
68+
bucket_last_cycle_total_objects_num: num_keys_to_copy,
69+
bucket_last_cycle_replicated_objects_num: num_keys_moved,
70+
bucket_last_cycle_error_objects_num: num_keys_to_copy - num_keys_moved,
71+
};
72+
dbg.log1('get_rule_and_bucket_status:: bucket_status: ', bucket_status);
73+
74+
return {rule_status, bucket_status};
6475
}
6576

66-
function update_replication_prom_report(bucket_name, replication_policy_id, replication_status) {
77+
function update_replication_prom_report(bucket_name, replication_policy_id, rule_status, bucket_status) {
6778
const core_report = prom_reporting.get_core_report();
6879
const last_cycle_status = _.defaults({
69-
...replication_status,
80+
...rule_status,
81+
...bucket_status,
7082
bucket_name: bucket_name.unwrap(),
7183
replication_id: replication_policy_id
7284
}, PARTIAL_SINGLE_BUCKET_REPLICATION_DEFAULTS);
@@ -185,7 +197,7 @@ async function delete_objects(scanner_semaphore, client, bucket_name, keys) {
185197
}
186198

187199
// EXPORTS
188-
exports.get_rule_status = get_rule_status;
200+
exports.get_rule_and_bucket_status = get_rule_and_bucket_status;
189201
exports.update_replication_prom_report = update_replication_prom_report;
190202
exports.get_object_md = get_object_md;
191203
exports.find_src_and_dst_buckets = find_src_and_dst_buckets;

0 commit comments

Comments
 (0)