@@ -34,7 +34,11 @@ const LIFECYLE_TIMESTAMP_FILE = 'lifecycle.timestamp';
34
34
const config_fs_options = { silent_if_missing : true } ;
35
35
const ILM_POLICIES_TMP_DIR = path . join ( config . NC_LIFECYCLE_LOGS_DIR , 'lifecycle_ilm_policies' ) ;
36
36
const ILM_CANDIDATES_TMP_DIR = path . join ( config . NC_LIFECYCLE_LOGS_DIR , 'lifecycle_ilm_candidates' ) ;
37
-
37
+ const escape_backslash_str = "ESCAPE '\\'" ;
38
+ const underscore_wildcard_regex = / _ / g;
39
+ const precentage_wildcard_regex = / % / g;
40
+ const single_quote_regex = / ' / g;
41
+ const backslash_regex = / \\ / g;
38
42
39
43
const TIMED_OPS = Object . freeze ( {
40
44
RUN_LIFECYLE : 'run_lifecycle' ,
@@ -1257,16 +1261,17 @@ class NCLifecycle {
1257
1261
convert_lifecycle_policy_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) {
1258
1262
const bucket_path = bucket_json . path ;
1259
1263
const bucket_rule_id = this . get_lifecycle_ilm_candidate_file_suffix ( bucket_json . name , lifecycle_rule ) ;
1260
- const in_bucket_path = path . join ( bucket_path , '/%' ) ;
1261
- const in_bucket_internal_dir = path . join ( bucket_path , `/${ config . NSFS_TEMP_DIR_NAME } %/%` ) ;
1262
- const in_versions_dir = path . join ( bucket_path , '/.versions/%' ) ;
1263
- const in_nested_versions_dir = path . join ( bucket_path , '/%/.versions/%' ) ;
1264
+ const escaped_bucket_path = this . _escape_like_clause_ilm_policy ( bucket_path ) ;
1265
+ const in_bucket_path = path . join ( escaped_bucket_path , '/%' ) ;
1266
+ const in_bucket_internal_dir = path . join ( escaped_bucket_path , `/${ config . NSFS_TEMP_DIR_NAME } %/%` ) ;
1267
+ const in_versions_dir = path . join ( escaped_bucket_path , '/.versions/%' ) ;
1268
+ const in_nested_versions_dir = path . join ( escaped_bucket_path , '/%/.versions/%' ) ;
1264
1269
const ilm_policy_helpers = { bucket_rule_id, in_bucket_path, in_bucket_internal_dir, in_versions_dir, in_nested_versions_dir } ;
1265
1270
1266
1271
const policy_base = this . _get_gpfs_ilm_policy_base ( ilm_policy_helpers ) ;
1267
1272
const expiry_string = this . convert_expiry_rule_to_gpfs_ilm_policy ( lifecycle_rule , ilm_policy_helpers ) ;
1268
1273
const non_current_days_string = this . convert_noncurrent_version_by_days_to_gpfs_ilm_policy ( lifecycle_rule , ilm_policy_helpers ) ;
1269
- const filter_policy = this . convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) ;
1274
+ const filter_policy = this . convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , escaped_bucket_path ) ;
1270
1275
return policy_base + non_current_days_string + expiry_string + filter_policy ;
1271
1276
}
1272
1277
@@ -1280,12 +1285,29 @@ class NCLifecycle {
1280
1285
const mod_age_definition = `define( mod_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(MODIFICATION_TIME)) )\n` ;
1281
1286
const change_age_definition = `define( change_age, (DAYS(CURRENT_TIMESTAMP) - DAYS(CHANGE_TIME)) )\n` ;
1282
1287
const rule_id_definition = `RULE '${ bucket_rule_id } ' LIST '${ bucket_rule_id } '\n` ;
1283
- const policy_path_base = `WHERE PATH_NAME LIKE '${ in_bucket_path } '\n` +
1284
- `AND PATH_NAME NOT LIKE '${ in_bucket_internal_dir } '\n` ;
1288
+ const policy_path_base = `WHERE PATH_NAME LIKE '${ in_bucket_path } ' ${ escape_backslash_str } \n` +
1289
+ `AND PATH_NAME NOT LIKE '${ in_bucket_internal_dir } ' ${ escape_backslash_str } \n` ;
1285
1290
1286
1291
return mod_age_definition + change_age_definition + rule_id_definition + policy_path_base ;
1287
1292
}
1288
1293
1294
+ /**
1295
+ * escape_like_clause_ilm_policy escapes the \ _ % and ' characters in the ILM policy string
1296
+ * this is needed because GPFS ILM policies use _ and % as wildcards
1297
+ * and we need to escape them to use them as normal characters
1298
+ * since we are escaping using backslash we also need to escape the backslash itself
1299
+ * IMPORTANT - escaping of the backslash must be done before escaping of the underscore and percentage
1300
+ * @param {String } ilm_policy_string
1301
+ * @returns String
1302
+ */
1303
+ _escape_like_clause_ilm_policy ( ilm_policy_string ) {
1304
+ return ilm_policy_string
1305
+ . replace ( backslash_regex , '\\\\' )
1306
+ . replace ( underscore_wildcard_regex , '\\_' )
1307
+ . replace ( precentage_wildcard_regex , '\\%' )
1308
+ . replace ( single_quote_regex , `''` ) ;
1309
+ }
1310
+
1289
1311
/**
1290
1312
* convert_expiry_rule_to_gpfs_ilm_policy converts the expiry rule to GPFS ILM policy
1291
1313
* expiration rule works on latest version path (not inside .versions or in nested .versions)
@@ -1296,8 +1318,8 @@ class NCLifecycle {
1296
1318
convert_expiry_rule_to_gpfs_ilm_policy ( lifecycle_rule , { in_versions_dir, in_nested_versions_dir } ) {
1297
1319
const { expiration = undefined } = lifecycle_rule ;
1298
1320
if ( ! expiration ) return '' ;
1299
- const current_path_policy = `AND PATH_NAME NOT LIKE '${ in_versions_dir } '\n` +
1300
- `AND PATH_NAME NOT LIKE '${ in_nested_versions_dir } '\n` ;
1321
+ const current_path_policy = `AND PATH_NAME NOT LIKE '${ in_versions_dir } ' ${ escape_backslash_str } \n` +
1322
+ `AND PATH_NAME NOT LIKE '${ in_nested_versions_dir } ' ${ escape_backslash_str } \n` ;
1301
1323
1302
1324
const expiry_policy = expiration . days ? `AND mod_age > ${ expiration . days } \n` : '' ;
1303
1325
return current_path_policy + expiry_policy ;
@@ -1317,20 +1339,23 @@ class NCLifecycle {
1317
1339
/**
1318
1340
* convert_filter_to_gpfs_ilm_policy converts the filter to GPFS ILM policy
1319
1341
* @param {* } lifecycle_rule
1320
- * @param {Object } bucket_json
1342
+ * @param {String } escaped_bucket_path
1321
1343
* @returns {String }
1322
1344
*/
1323
- convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , bucket_json ) {
1345
+ convert_filter_to_gpfs_ilm_policy ( lifecycle_rule , escaped_bucket_path ) {
1324
1346
const { prefix = undefined , filter = { } } = lifecycle_rule ;
1325
- const bucket_path = bucket_json . path ;
1326
1347
let filter_policy = '' ;
1327
1348
if ( prefix || Object . keys ( filter ) . length > 0 ) {
1328
1349
const { object_size_greater_than = undefined , object_size_less_than = undefined , tags = undefined } = filter ;
1329
1350
const rule_prefix = prefix || filter . prefix ;
1330
- filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${ path . join ( bucket_path , rule_prefix ) } %'\n` : '' ;
1351
+ const escaped_prefix = this . _escape_like_clause_ilm_policy ( rule_prefix || '' ) ;
1352
+ filter_policy += rule_prefix ? `AND PATH_NAME LIKE '${ path . join ( escaped_bucket_path , escaped_prefix ) } %' ${ escape_backslash_str } \n` : '' ;
1331
1353
filter_policy += object_size_greater_than === undefined ? '' : `AND FILE_SIZE > ${ object_size_greater_than } \n` ;
1332
1354
filter_policy += object_size_less_than === undefined ? '' : `AND FILE_SIZE < ${ object_size_less_than } \n` ;
1333
- filter_policy += tags ? tags . map ( tag => `AND XATTR('user.noobaa.tag.${ tag . key } ') LIKE ${ tag . value } \n` ) . join ( '' ) : '' ;
1355
+ filter_policy += tags ? tags . map ( tag => {
1356
+ const escaped_tag_value = this . _escape_like_clause_ilm_policy ( tag . value ) ;
1357
+ return `AND XATTR('user.noobaa.tag.${ tag . key } ') LIKE '${ escaped_tag_value } ' ${ escape_backslash_str } \n` ;
1358
+ } ) . join ( '' ) : '' ;
1334
1359
}
1335
1360
return filter_policy ;
1336
1361
}
@@ -1493,16 +1518,21 @@ class NCLifecycle {
1493
1518
* example -
1494
1519
* 17460 1316236366 0 -- /mnt/gpfs0/account1_new_buckets_path/bucket1_storage/key1.txt
1495
1520
* if file is .folder (directory object) we need to return its parent directory
1496
- * @param {* } entry
1521
+ * Notice that trim() is not used here because if used will remove whitespaces from the end of the line and might delete
1522
+ * spaces at the end of the file name that might be part of the file name, file reader should trim the line before passing it to this function
1523
+ * @param {Object } entry - entry from the candidates file
1524
+ * @param {Object } bucket_json
1497
1525
*/
1498
1526
_parse_key_from_line ( entry , bucket_json ) {
1499
- const line_array = entry . path . split ( ' ' ) ;
1500
- const file_path = line_array [ line_array . length - 1 ] ;
1527
+ dbg . log1 ( `_parse_key_from_line entry=${ util . inspect ( entry ) } , bucket_json=${ util . inspect ( bucket_json ) } ` ) ;
1528
+ const path_start_index = entry . path . indexOf ( bucket_json . path ) ;
1529
+ const file_path = entry . path . slice ( path_start_index ) ;
1501
1530
let file_key = file_path . replace ( path . join ( bucket_json . path , '/' ) , '' ) ;
1502
1531
const basename = path . basename ( file_key ) ;
1503
1532
if ( basename . startsWith ( config . NSFS_FOLDER_OBJECT_NAME ) ) {
1504
1533
file_key = path . join ( path . dirname ( file_key ) , '/' ) ;
1505
1534
}
1535
+ dbg . log1 ( `_parse_key_from_line file_path=${ util . inspect ( file_path ) } , file_key=${ util . inspect ( file_key ) } ` ) ;
1506
1536
return file_key ;
1507
1537
}
1508
1538
}
0 commit comments