Skip to content

Commit 5eadfd8

Browse files
authored
Merge pull request #9098 from romayalon/romy-escape-special-wild-cards-ilm
NC | GPFS | ILM policy special chars and tagging fixes
2 parents d6feb0a + 6abe011 commit 5eadfd8

File tree

2 files changed

+86
-43
lines changed

2 files changed

+86
-43
lines changed

src/manage_nsfs/nc_lifecycle.js

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ const LIFECYLE_TIMESTAMP_FILE = 'lifecycle.timestamp';
3434
const config_fs_options = { silent_if_missing: true };
3535
const ILM_POLICIES_TMP_DIR = path.join(config.NC_LIFECYCLE_LOGS_DIR, 'lifecycle_ilm_policies');
3636
const ILM_CANDIDATES_TMP_DIR = path.join(config.NC_LIFECYCLE_LOGS_DIR, 'lifecycle_ilm_candidates');
37-
37+
const escape_backslash_str = "ESCAPE '\\'";
38+
const underscore_wildcard_regex = /_/g;
39+
const precentage_wildcard_regex = /%/g;
40+
const single_quote_regex = /'/g;
41+
const backslash_regex = /\\/g;
3842

3943
const TIMED_OPS = Object.freeze({
4044
RUN_LIFECYLE: 'run_lifecycle',
@@ -1257,16 +1261,17 @@ class NCLifecycle {
12571261
convert_lifecycle_policy_to_gpfs_ilm_policy(lifecycle_rule, bucket_json) {
12581262
const bucket_path = bucket_json.path;
12591263
const bucket_rule_id = this.get_lifecycle_ilm_candidate_file_suffix(bucket_json.name, lifecycle_rule);
1260-
const in_bucket_path = path.join(bucket_path, '/%');
1261-
const in_bucket_internal_dir = path.join(bucket_path, `/${config.NSFS_TEMP_DIR_NAME}%/%`);
1262-
const in_versions_dir = path.join(bucket_path, '/.versions/%');
1263-
const in_nested_versions_dir = path.join(bucket_path, '/%/.versions/%');
1264+
const escaped_bucket_path = this._escape_like_clause_ilm_policy(bucket_path);
1265+
const in_bucket_path = path.join(escaped_bucket_path, '/%');
1266+
const in_bucket_internal_dir = path.join(escaped_bucket_path, `/${config.NSFS_TEMP_DIR_NAME}%/%`);
1267+
const in_versions_dir = path.join(escaped_bucket_path, '/.versions/%');
1268+
const in_nested_versions_dir = path.join(escaped_bucket_path, '/%/.versions/%');
12641269
const ilm_policy_helpers = { bucket_rule_id, in_bucket_path, in_bucket_internal_dir, in_versions_dir, in_nested_versions_dir };
12651270

12661271
const policy_base = this._get_gpfs_ilm_policy_base(ilm_policy_helpers);
12671272
const expiry_string = this.convert_expiry_rule_to_gpfs_ilm_policy(lifecycle_rule, ilm_policy_helpers);
12681273
const non_current_days_string = this.convert_noncurrent_version_by_days_to_gpfs_ilm_policy(lifecycle_rule, ilm_policy_helpers);
1269-
const filter_policy = this.convert_filter_to_gpfs_ilm_policy(lifecycle_rule, bucket_json);
1274+
const filter_policy = this.convert_filter_to_gpfs_ilm_policy(lifecycle_rule, escaped_bucket_path);
12701275
return policy_base + non_current_days_string + expiry_string + filter_policy;
12711276
}
12721277

@@ -1280,12 +1285,29 @@ class NCLifecycle {
12801285
const mod_age_definition = `define( mod_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(MODIFICATION_TIME)) )\n`;
12811286
const change_age_definition = `define( change_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(CHANGE_TIME)) )\n`;
12821287
const rule_id_definition = `RULE '${bucket_rule_id}' LIST '${bucket_rule_id}'\n`;
1283-
const policy_path_base = `WHERE PATH_NAME LIKE '${in_bucket_path}'\n` +
1284-
`AND PATH_NAME NOT LIKE '${in_bucket_internal_dir}'\n`;
1288+
const policy_path_base = `WHERE PATH_NAME LIKE '${in_bucket_path}' ${escape_backslash_str}\n` +
1289+
`AND PATH_NAME NOT LIKE '${in_bucket_internal_dir}' ${escape_backslash_str}\n`;
12851290

12861291
return mod_age_definition + change_age_definition + rule_id_definition + policy_path_base;
12871292
}
12881293

1294+
/**
1295+
* escape_like_clause_ilm_policy escapes the \ _ % and ' characters in the ILM policy string
1296+
* this is needed because GPFS ILM policies use _ and % as wildcards
1297+
* and we need to escape them to use them as normal characters
1298+
* since we are escaping using backslash we also need to escape the backslash itself
1299+
* IMPORTANT - escaping of the backslash must be done before escaping of the underscore and percentage
1300+
* @param {String} ilm_policy_string
1301+
* @returns String
1302+
*/
1303+
_escape_like_clause_ilm_policy(ilm_policy_string) {
1304+
return ilm_policy_string
1305+
.replace(backslash_regex, '\\\\')
1306+
.replace(underscore_wildcard_regex, '\\_')
1307+
.replace(precentage_wildcard_regex, '\\%')
1308+
.replace(single_quote_regex, `''`);
1309+
}
1310+
12891311
/**
12901312
* convert_expiry_rule_to_gpfs_ilm_policy converts the expiry rule to GPFS ILM policy
12911313
* expiration rule works on latest version path (not inside .versions or in nested .versions)
@@ -1296,8 +1318,8 @@ class NCLifecycle {
12961318
convert_expiry_rule_to_gpfs_ilm_policy(lifecycle_rule, { in_versions_dir, in_nested_versions_dir }) {
12971319
const { expiration = undefined } = lifecycle_rule;
12981320
if (!expiration) return '';
1299-
const current_path_policy = `AND PATH_NAME NOT LIKE '${in_versions_dir}'\n` +
1300-
`AND PATH_NAME NOT LIKE '${in_nested_versions_dir}'\n`;
1321+
const current_path_policy = `AND PATH_NAME NOT LIKE '${in_versions_dir}' ${escape_backslash_str}\n` +
1322+
`AND PATH_NAME NOT LIKE '${in_nested_versions_dir}' ${escape_backslash_str}\n`;
13011323

13021324
const expiry_policy = expiration.days ? `AND mod_age > ${expiration.days}\n` : '';
13031325
return current_path_policy + expiry_policy;
@@ -1317,20 +1339,23 @@ class NCLifecycle {
13171339
/**
13181340
* convert_filter_to_gpfs_ilm_policy converts the filter to GPFS ILM policy
13191341
* @param {*} lifecycle_rule
1320-
* @param {Object} bucket_json
1342+
* @param {String} escaped_bucket_path
13211343
* @returns {String}
13221344
*/
1323-
convert_filter_to_gpfs_ilm_policy(lifecycle_rule, bucket_json) {
1345+
convert_filter_to_gpfs_ilm_policy(lifecycle_rule, escaped_bucket_path) {
13241346
const { prefix = undefined, filter = {} } = lifecycle_rule;
1325-
const bucket_path = bucket_json.path;
13261347
let filter_policy = '';
13271348
if (prefix || Object.keys(filter).length > 0) {
13281349
const { object_size_greater_than = undefined, object_size_less_than = undefined, tags = undefined } = filter;
13291350
const rule_prefix = prefix || filter.prefix;
1330-
filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${path.join(bucket_path, rule_prefix)}%'\n` : '';
1351+
const escaped_prefix = this._escape_like_clause_ilm_policy(rule_prefix || '');
1352+
filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${path.join(escaped_bucket_path, escaped_prefix)}%' ${escape_backslash_str}\n` : '';
13311353
filter_policy += object_size_greater_than === undefined ? '' : `AND FILE_SIZE > ${object_size_greater_than}\n`;
13321354
filter_policy += object_size_less_than === undefined ? '' : `AND FILE_SIZE < ${object_size_less_than}\n`;
1333-
filter_policy += tags ? tags.map(tag => `AND XATTR('user.noobaa.tag.${tag.key}') LIKE ${tag.value}\n`).join('') : '';
1355+
filter_policy += tags ? tags.map(tag => {
1356+
const escaped_tag_value = this._escape_like_clause_ilm_policy(tag.value);
1357+
return `AND XATTR('user.noobaa.tag.${tag.key}') LIKE '${escaped_tag_value}' ${escape_backslash_str}\n`;
1358+
}).join('') : '';
13341359
}
13351360
return filter_policy;
13361361
}
@@ -1493,16 +1518,21 @@ class NCLifecycle {
14931518
* example -
14941519
* 17460 1316236366 0 -- /mnt/gpfs0/account1_new_buckets_path/bucket1_storage/key1.txt
14951520
* if file is .folder (directory object) we need to return its parent directory
1496-
* @param {*} entry
1521+
* Notice that trim() is not used here because if used will remove whitespaces from the end of the line and might delete
1522+
* spaces at the end of the file name that might be part of the file name, file reader should trim the line before passing it to this function
1523+
* @param {Object} entry - entry from the candidates file
1524+
* @param {Object} bucket_json
14971525
*/
14981526
_parse_key_from_line(entry, bucket_json) {
1499-
const line_array = entry.path.split(' ');
1500-
const file_path = line_array[line_array.length - 1];
1527+
dbg.log1(`_parse_key_from_line entry=${util.inspect(entry)}, bucket_json=${util.inspect(bucket_json)}`);
1528+
const path_start_index = entry.path.indexOf(bucket_json.path);
1529+
const file_path = entry.path.slice(path_start_index);
15011530
let file_key = file_path.replace(path.join(bucket_json.path, '/'), '');
15021531
const basename = path.basename(file_key);
15031532
if (basename.startsWith(config.NSFS_FOLDER_OBJECT_NAME)) {
15041533
file_key = path.join(path.dirname(file_key), '/');
15051534
}
1535+
dbg.log1(`_parse_key_from_line file_path=${util.inspect(file_path)}, file_key=${util.inspect(file_key)}`);
15061536
return file_key;
15071537
}
15081538
}

0 commit comments

Comments
 (0)