Skip to content

Commit 49c9e98

Browse files
authored
Merge pull request #8900 from romayalon/romy-backport-lifecycle-5.18
Backports | 5.18.2 | NC | Lifecycle
2 parents bea5a98 + acdb4a3 commit 49c9e98

File tree

8 files changed

+427
-65
lines changed

8 files changed

+427
-65
lines changed

config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,9 @@ config.NC_LIFECYCLE_RUN_DELAY_LIMIT_MINS = 2;
10211021
/** @type {'UTC' | 'LOCAL'} */
10221022
config.NC_LIFECYCLE_TZ = 'LOCAL';
10231023

1024+
config.NC_LIFECYCLE_LIST_BATCH_SIZE = 1000;
1025+
config.NC_LIFECYCLE_BUCKET_BATCH_SIZE = 10000;
1026+
10241027
config.NC_LIFECYCLE_GPFS_ILM_ENABLED = false;
10251028
////////// GPFS //////////
10261029
config.GPFS_DOWN_DELAY = 1000;

src/manage_nsfs/health.js

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
const dbg = require('../util/debug_module')(__filename);
55
const _ = require('lodash');
6+
const path = require('path');
67
const P = require('../util/promise');
78
const config = require('../../config');
89
const os_util = require('../util/os_utils');
@@ -104,6 +105,7 @@ class NSFSHealth {
104105
this.all_bucket_details = options.all_bucket_details;
105106
this.all_connection_details = options.all_connection_details;
106107
this.notif_storage_threshold = options.notif_storage_threshold;
108+
this.lifecycle = options.lifecycle;
107109
this.config_fs = options.config_fs;
108110
}
109111

@@ -133,6 +135,7 @@ class NSFSHealth {
133135
let account_details;
134136
let connection_details;
135137
let notif_storage_threshold_details;
138+
let latest_lifecycle_run_status;
136139
const endpoint_response_code = (endpoint_state && endpoint_state.response?.response_code) || 'UNKNOWN_ERROR';
137140
const health_check_params = { service_status, pid, endpoint_response_code, config_directory_status };
138141
const service_health = this._calc_health_status(health_check_params);
@@ -141,6 +144,8 @@ class NSFSHealth {
141144
if (this.all_account_details) account_details = await this.get_account_status();
142145
if (this.all_connection_details) connection_details = await this.get_connection_status();
143146
if (this.notif_storage_threshold) notif_storage_threshold_details = this.get_notif_storage_threshold_status();
147+
if (this.lifecycle) latest_lifecycle_run_status = await this.get_lifecycle_health_status();
148+
144149
const health = {
145150
service_name: NOOBAA_SERVICE_NAME,
146151
status: service_health,
@@ -164,7 +169,8 @@ class NSFSHealth {
164169
error_type: health_errors_tyes.PERSISTENT,
165170
},
166171
connections_status: connection_details,
167-
notif_storage_threshold_details
172+
notif_storage_threshold_details,
173+
latest_lifecycle_run_status
168174
}
169175
};
170176
if (!this.all_account_details) delete health.checks.accounts_status;
@@ -333,10 +339,10 @@ class NSFSHealth {
333339
};
334340
}
335341

336-
async validate_config_dir_exists(path, type) {
337-
const config_root_type_exists = await this.config_fs.validate_config_dir_exists(path);
342+
async validate_config_dir_exists(config_dir_path, type) {
343+
const config_root_type_exists = await this.config_fs.validate_config_dir_exists(config_dir_path);
338344
if (!config_root_type_exists) {
339-
dbg.log1(`Config directory type - ${type} is missing, ${path}`);
345+
dbg.log1(`Config directory type - ${type} is missing, ${config_dir_path}`);
340346
return {
341347
invalid_storages: [],
342348
valid_storages: []
@@ -446,6 +452,48 @@ class NSFSHealth {
446452
return res;
447453
}
448454

455+
/////////////////////////////
456+
// LIFECYCLE HEALTH STATUS //
457+
/////////////////////////////
458+
459+
/**
460+
* get_lifecycle_health_status returns the lifecycle rules status based on the status of the latest lifecycle wroker run
461+
* on the same host
462+
* @returns {Promise<object>}
463+
*/
464+
async get_lifecycle_health_status() {
465+
const latest_lifecycle_run_status = await this.get_latest_lifecycle_run_status({ silent_if_missing: true });
466+
if (!latest_lifecycle_run_status) return {};
467+
return {
468+
total_stats: latest_lifecycle_run_status.total_stats,
469+
lifecycle_run_times: latest_lifecycle_run_status.lifecycle_run_times,
470+
errors: latest_lifecycle_run_status.errors
471+
};
472+
}
473+
474+
475+
/**
476+
* get_latest_lifecycle_run_status returns the latest lifecycle run status
477+
* latest run can be found by maxing the lifecycle log entry names, log entry name is the lifecycle_run_{timestamp}.json of the run
478+
* @params {{silent_if_missing: boolean}} options
479+
* @returns {Promise<object | undefined >}
480+
*/
481+
async get_latest_lifecycle_run_status(options) {
482+
const { silent_if_missing = false } = options;
483+
try {
484+
const lifecycle_log_entries = await nb_native().fs.readdir(this.config_fs.fs_context, config.NC_LIFECYCLE_LOGS_DIR);
485+
const latest_lifecycle_run = _.maxBy(lifecycle_log_entries, entry => entry.name);
486+
const latest_lifecycle_run_status_path = path.join(config.NC_LIFECYCLE_LOGS_DIR, latest_lifecycle_run.name);
487+
const latest_lifecycle_run_status = await this.config_fs.get_config_data(latest_lifecycle_run_status_path, options);
488+
return latest_lifecycle_run_status;
489+
} catch (err) {
490+
if (err.code === 'ENOENT' && silent_if_missing) {
491+
return;
492+
}
493+
throw err;
494+
}
495+
}
496+
449497
/**
450498
* get_config_file_data_or_error_object return an object containing config_data or err_obj if error occurred
451499
* @param {string} type
@@ -613,10 +661,11 @@ async function get_health_status(argv, config_fs) {
613661
const all_bucket_details = get_boolean_or_string_value(argv.all_bucket_details);
614662
const all_connection_details = get_boolean_or_string_value(argv.all_connection_details);
615663
const notif_storage_threshold = get_boolean_or_string_value(argv.notif_storage_threshold);
664+
const lifecycle = get_boolean_or_string_value(argv.lifecycle);
616665

617666
if (deployment_type === 'nc') {
618667
const health = new NSFSHealth({ https_port,
619-
all_account_details, all_bucket_details, all_connection_details, notif_storage_threshold, config_fs });
668+
all_account_details, all_bucket_details, all_connection_details, notif_storage_threshold, lifecycle, config_fs });
620669
const health_status = await health.nc_nsfs_health();
621670
write_stdout_response(ManageCLIResponse.HealthStatus, health_status);
622671
} else {

src/manage_nsfs/manage_nsfs_constants.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ const VALID_OPTIONS_GLACIER = {
7575
};
7676

7777
const VALID_OPTIONS_DIAGNOSE = {
78-
'health': new Set([ 'https_port', 'deployment_type', 'all_account_details', 'all_bucket_details', 'all_connection_details', 'notif_storage_threshold', ...CLI_MUTUAL_OPTIONS]),
78+
'health': new Set([ 'https_port', 'deployment_type', 'all_account_details', 'all_bucket_details', 'all_connection_details', 'notif_storage_threshold', 'lifecycle', ...CLI_MUTUAL_OPTIONS]),
7979
'gather-logs': new Set([ CONFIG_ROOT_FLAG]),
8080
'metrics': new Set([CONFIG_ROOT_FLAG])
8181
};
@@ -150,6 +150,7 @@ const OPTION_TYPE = {
150150
notif_storage_threshold: 'boolean',
151151
https_port: 'number',
152152
debug: 'number',
153+
lifecycle: 'boolean',
153154
// upgrade options
154155
expected_version: 'string',
155156
expected_hosts: 'string',

src/manage_nsfs/manage_nsfs_events_utils.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ NoobaaEvent.LIFECYCLE_FAILED = Object.freeze({
444444
event_type: 'ERROR',
445445
scope: 'NODE',
446446
severity: 'ERROR',
447-
state: 'DEGRADED',
447+
state: 'HEALTHY',
448448
});
449449

450450
NoobaaEvent.LIFECYCLE_TIMEOUT = Object.freeze({
@@ -455,7 +455,7 @@ NoobaaEvent.LIFECYCLE_TIMEOUT = Object.freeze({
455455
event_type: 'ERROR',
456456
scope: 'NODE',
457457
severity: 'ERROR',
458-
state: 'DEGRADED',
458+
state: 'HEALTHY',
459459
});
460460

461461
exports.NoobaaEvent = NoobaaEvent;

src/manage_nsfs/manage_nsfs_help_utils.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ Flags:
371371
--all_account_details <boolean> (optional) Set a flag for returning all account details.
372372
--all_bucket_details <boolean> (optional) Set a flag for returning all bucket details.
373373
--all_connection_details <boolean> (optional) Set a flag for returning all connection details.
374+
--lifecycle <boolean> (optional) Set a flag for returning lifecycle details on the current host.
374375
--debug <number> (optional) Use for increasing the log verbosity of health cli commands.
375376
--config_root <string> (optional) Set Configuration files path for Noobaa standalon NSFS. (default config.NSFS_NC_DEFAULT_CONF_DIR)
376377

src/manage_nsfs/nc_lifecycle.js

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ const config_fs_options = { silent_if_missing: true };
3131

3232
const lifecycle_run_status = {
3333
running_host: os.hostname(), lifecycle_run_times: {},
34-
total_stats: _get_default_stats(), buckets_statuses: {}
34+
total_stats: _get_default_stats(), buckets_statuses: {},
35+
state: {is_finished: false}
3536
};
3637

3738
let return_short_status = false;
@@ -137,13 +138,19 @@ async function run_lifecycle(config_fs, disable_service_validation) {
137138
*/
138139
async function process_buckets(config_fs, bucket_names, system_json) {
139140
const buckets_concurrency = 10; // TODO - think about it
140-
await P.map_with_concurrency(buckets_concurrency, bucket_names, async bucket_name =>
141-
await _call_op_and_update_status({
142-
bucket_name,
143-
op_name: TIMED_OPS.PROCESS_BUCKET,
144-
op_func: async () => process_bucket(config_fs, bucket_name, system_json)
145-
})
146-
);
141+
while (!lifecycle_run_status.state.is_finished) {
142+
await P.map_with_concurrency(buckets_concurrency, bucket_names, async bucket_name =>
143+
await _call_op_and_update_status({
144+
bucket_name,
145+
op_name: TIMED_OPS.PROCESS_BUCKET,
146+
op_func: async () => process_bucket(config_fs, bucket_name, system_json)
147+
})
148+
);
149+
lifecycle_run_status.state.is_finished = Object.values(lifecycle_run_status.buckets_statuses).reduce(
150+
(acc, bucket) => acc && (bucket.state?.is_finished),
151+
true
152+
);
153+
}
147154
}
148155

149156
/**
@@ -158,7 +165,10 @@ async function process_bucket(config_fs, bucket_name, system_json) {
158165
const object_sdk = new NsfsObjectSDK('', config_fs, account, bucket_json.versioning, config_fs.config_root, system_json);
159166
await object_sdk._simple_load_requesting_account();
160167
const should_notify = notifications_util.should_notify_on_event(bucket_json, notifications_util.OP_TO_EVENT.lifecycle_delete.name);
161-
if (!bucket_json.lifecycle_configuration_rules) return {};
168+
if (!bucket_json.lifecycle_configuration_rules) {
169+
lifecycle_run_status.buckets_statuses[bucket_json.name].state = {is_finished: true};
170+
return;
171+
}
162172
await process_rules(config_fs, bucket_json, object_sdk, should_notify);
163173
}
164174

@@ -170,16 +180,31 @@ async function process_bucket(config_fs, bucket_name, system_json) {
170180
*/
171181
async function process_rules(config_fs, bucket_json, object_sdk, should_notify) {
172182
try {
173-
await P.all(_.map(bucket_json.lifecycle_configuration_rules,
174-
async (lifecycle_rule, index) =>
175-
await _call_op_and_update_status({
176-
bucket_name: bucket_json.name,
177-
rule_id: lifecycle_rule.id,
178-
op_name: TIMED_OPS.PROCESS_RULE,
179-
op_func: async () => process_rule(config_fs, lifecycle_rule, index, bucket_json, object_sdk, should_notify)
180-
})
181-
)
182-
);
183+
lifecycle_run_status.buckets_statuses[bucket_json.name].state ??= {};
184+
const bucket_state = lifecycle_run_status.buckets_statuses[bucket_json.name].state;
185+
bucket_state.num_processed_objects = 0;
186+
while (!bucket_state.is_finished && bucket_state.num_processed_objects < config.NC_LIFECYCLE_BUCKET_BATCH_SIZE) {
187+
await P.all(_.map(bucket_json.lifecycle_configuration_rules,
188+
async (lifecycle_rule, index) =>
189+
await _call_op_and_update_status({
190+
bucket_name: bucket_json.name,
191+
rule_id: lifecycle_rule.id,
192+
op_name: TIMED_OPS.PROCESS_RULE,
193+
op_func: async () => process_rule(config_fs,
194+
lifecycle_rule,
195+
index,
196+
bucket_json,
197+
object_sdk,
198+
should_notify
199+
)
200+
})
201+
)
202+
);
203+
bucket_state.is_finished = Object.values(lifecycle_run_status.buckets_statuses[bucket_json.name].rules_statuses).reduce(
204+
(acc, rule) => acc && (_.isEmpty(rule.state) || rule.state.is_finished),
205+
true
206+
);
207+
}
183208
} catch (err) {
184209
dbg.error('process_rules failed with error', err, err.code, err.message);
185210
}
@@ -268,7 +293,6 @@ async function process_rule(config_fs, lifecycle_rule, index, bucket_json, objec
268293
op_func: async () => abort_mpus(candidates, object_sdk)
269294
});
270295
}
271-
await update_lifecycle_rules_last_sync(config_fs, bucket_json, rule_id, index);
272296
} catch (err) {
273297
dbg.error('process_rule failed with error', err, err.code, err.message);
274298
}
@@ -350,8 +374,9 @@ async function throw_if_noobaa_not_active(config_fs, system_json) {
350374
*/
351375
async function get_candidates(bucket_json, lifecycle_rule, object_sdk, fs_context) {
352376
const candidates = { abort_mpu_candidates: [], delete_candidates: [] };
377+
const rule_state = lifecycle_run_status.buckets_statuses[bucket_json.name].rules_statuses[lifecycle_rule.id]?.state || {};
353378
if (lifecycle_rule.expiration) {
354-
candidates.delete_candidates = await get_candidates_by_expiration_rule(lifecycle_rule, bucket_json, object_sdk);
379+
candidates.delete_candidates = await get_candidates_by_expiration_rule(lifecycle_rule, bucket_json, object_sdk, rule_state);
355380
if (lifecycle_rule.expiration.days || lifecycle_rule.expiration.expired_object_delete_marker) {
356381
const dm_candidates = await get_candidates_by_expiration_delete_marker_rule(lifecycle_rule, bucket_json);
357382
candidates.delete_candidates = candidates.delete_candidates.concat(dm_candidates);
@@ -365,6 +390,7 @@ async function get_candidates(bucket_json, lifecycle_rule, object_sdk, fs_contex
365390
candidates.abort_mpu_candidates = await get_candidates_by_abort_incomplete_multipart_upload_rule(
366391
lifecycle_rule, bucket_json, object_sdk, fs_context);
367392
}
393+
lifecycle_run_status.buckets_statuses[bucket_json.name].rules_statuses[lifecycle_rule.id].state = rule_state;
368394
return candidates;
369395
}
370396

@@ -376,15 +402,10 @@ async function get_candidates(bucket_json, lifecycle_rule, object_sdk, fs_contex
376402
* @returns {boolean}
377403
*/
378404
function validate_rule_enabled(rule, bucket) {
379-
const now = Date.now();
380405
if (rule.status !== 'Enabled') {
381406
dbg.log0('validate_rule_enabled: SKIP bucket:', bucket.name, '(bucket id:', bucket._id, ') rule', util.inspect(rule), 'not Enabled');
382407
return false;
383408
}
384-
if (rule.last_sync && now - rule.last_sync < config.LIFECYCLE_SCHEDULE_MIN) {
385-
dbg.log0('validate_rule_enabled: SKIP bucket:', bucket.name, '(bucket id:', bucket._id, ') rule', util.inspect(rule), 'now', now, 'last_sync', rule.last_sync, 'schedule min', config.LIFECYCLE_SCHEDULE_MIN);
386-
return false;
387-
}
388409
return true;
389410
}
390411

@@ -410,12 +431,12 @@ function _get_lifecycle_object_info_from_list_object_entry(entry) {
410431
* @param {Object} bucket_json
411432
* @returns {Promise<Object[]>}
412433
*/
413-
async function get_candidates_by_expiration_rule(lifecycle_rule, bucket_json, object_sdk) {
434+
async function get_candidates_by_expiration_rule(lifecycle_rule, bucket_json, object_sdk, rule_state) {
414435
const is_gpfs = nb_native().fs.gpfs;
415436
if (is_gpfs && config.NC_LIFECYCLE_GPFS_ILM_ENABLED) {
416437
return get_candidates_by_expiration_rule_gpfs(lifecycle_rule, bucket_json);
417438
} else {
418-
return get_candidates_by_expiration_rule_posix(lifecycle_rule, bucket_json, object_sdk);
439+
return get_candidates_by_expiration_rule_posix(lifecycle_rule, bucket_json, object_sdk, rule_state);
419440
}
420441
}
421442

@@ -436,21 +457,34 @@ async function get_candidates_by_expiration_rule_gpfs(lifecycle_rule, bucket_jso
436457
* @param {Object} bucket_json
437458
* @returns {Promise<Object[]>}
438459
*/
439-
async function get_candidates_by_expiration_rule_posix(lifecycle_rule, bucket_json, object_sdk) {
460+
async function get_candidates_by_expiration_rule_posix(lifecycle_rule, bucket_json, object_sdk, rule_state) {
440461
const expiration = _get_expiration_time(lifecycle_rule.expiration);
441462
if (expiration < 0) return [];
442463
const filter_func = _build_lifecycle_filter({filter: lifecycle_rule.filter, expiration});
443464

444465
const filtered_objects = [];
445466
// TODO list_objects does not accept a filter and works in batch sizes of 1000. should handle batching
446467
// also should maybe create a helper function or add argument for a filter in list object
447-
const objects_list = await object_sdk.list_objects({bucket: bucket_json.name, prefix: lifecycle_rule.filter?.prefix});
468+
const objects_list = await object_sdk.list_objects({
469+
bucket: bucket_json.name,
470+
prefix: lifecycle_rule.filter?.prefix,
471+
key_marker: rule_state.key_marker,
472+
limit: config.NC_LIFECYCLE_LIST_BATCH_SIZE
473+
});
448474
objects_list.objects.forEach(obj => {
449475
const object_info = _get_lifecycle_object_info_from_list_object_entry(obj);
450476
if (filter_func(object_info)) {
451477
filtered_objects.push({key: object_info.key});
452478
}
453479
});
480+
481+
const bucket_state = lifecycle_run_status.buckets_statuses[bucket_json.name].state;
482+
bucket_state.num_processed_objects += objects_list.objects.length;
483+
if (objects_list.is_truncated) {
484+
rule_state.key_marker = objects_list.next_marker;
485+
} else {
486+
rule_state.is_finished = true;
487+
}
454488
return filtered_objects;
455489

456490
}
@@ -630,26 +664,6 @@ function _file_contain_tags(object_info, filter_tags) {
630664
//////// STATUS HELPERS ////////
631665
/////////////////////////////////
632666

633-
/**
634-
* update_lifecycle_rules_last_sync updates the last sync time of the lifecycle rule
635-
* @param {import('../sdk/config_fs').ConfigFS} config_fs
636-
* @param {Object} bucket_json
637-
* @param {String} rule_id
638-
* @param {number} index
639-
* @returns {Promise<Void>}
640-
*/
641-
async function update_lifecycle_rules_last_sync(config_fs, bucket_json, rule_id, index) {
642-
bucket_json.lifecycle_configuration_rules[index].last_sync = Date.now();
643-
const { num_objects_deleted = 0, num_mpu_aborted = 0 } =
644-
lifecycle_run_status.buckets_statuses[bucket_json.name].rules_statuses[rule_id].rule_stats;
645-
// if (res.num_objects_deleted >= config.LIFECYCLE_BATCH_SIZE) should_rerun = true; // TODO - think if needed and add something about mpu abort
646-
dbg.log0('nc_lifecycle.update_lifecycle_rules_last_sync Done bucket:', bucket_json.name, '(bucket id:', bucket_json._id, ') done deletion of objects per rule',
647-
bucket_json.lifecycle_configuration_rules[index],
648-
'time:', bucket_json.lifecycle_configuration_rules[index].last_sync,
649-
'num_objects_deleted:', num_objects_deleted, 'num_mpu_aborted:', num_mpu_aborted);
650-
await config_fs.update_bucket_config_file(bucket_json);
651-
}
652-
653667
/**
654668
* _call_op_and_update_status calls the op and report time and error to the lifecycle status.
655669
*

0 commit comments

Comments
 (0)