@@ -1052,6 +1052,8 @@ def set_process(
1052
1052
del set_to_candidate_dict [set_fileset ]
1053
1053
del id_to_fileset_dict [set_fileset ]
1054
1054
1055
+ manual_merge_map = defaultdict (list )
1056
+
1055
1057
for fileset_id , candidate_filesets in set_to_candidate_dict .items ():
1056
1058
fileset = id_to_fileset_dict [fileset_id ]
1057
1059
@@ -1060,16 +1062,6 @@ def set_process(
1060
1062
fileset ["name" ], candidate_filesets , conn
1061
1063
)
1062
1064
1063
- for candidate_fileset in candidate_filesets :
1064
- with conn .cursor () as cursor :
1065
- cursor .execute (
1066
- "SELECT id FROM fileset WHERE status = 'current' AND id = %s" ,
1067
- (candidate_fileset ),
1068
- )
1069
- result = cursor .fetchone ()
1070
- if result :
1071
- candidate_filesets .remove (candidate_fileset )
1072
-
1073
1065
(
1074
1066
fully_matched_filesets ,
1075
1067
auto_merged_filesets ,
@@ -1086,14 +1078,31 @@ def set_process(
1086
1078
auto_merged_filesets ,
1087
1079
manual_merged_filesets ,
1088
1080
mismatch_filesets ,
1081
+ manual_merge_map ,
1082
+ set_to_candidate_dict ,
1089
1083
conn ,
1090
1084
skiplog ,
1091
1085
)
1092
1086
1087
+ # print(manual_merge_map)
1088
+
1089
+ for fileset_id , candidates in manual_merge_map .items ():
1090
+ category_text = "Manual Merge Required"
1091
+ log_text = f"Merge Fileset:{ fileset_id } manually. Possible matches are: { ', ' .join (f'Fileset:{ id } ' for id in candidates )} ."
1092
+ manual_merged_filesets += 1
1093
+ # print(candidates)
1094
+ add_manual_merge (
1095
+ candidates ,
1096
+ fileset_id ,
1097
+ category_text ,
1098
+ log_text ,
1099
+ log_text ,
1100
+ user ,
1101
+ conn ,
1102
+ )
1103
+
1093
1104
# Final log
1094
1105
with conn .cursor () as cursor :
1095
- cursor .execute ("UPDATE fileset SET status = 'partial' WHERE status = 'current'" )
1096
-
1097
1106
cursor .execute (
1098
1107
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s" ,
1099
1108
(transaction_id ,),
@@ -1156,6 +1165,8 @@ def set_perform_match(
1156
1165
auto_merged_filesets ,
1157
1166
manual_merged_filesets ,
1158
1167
mismatch_filesets ,
1168
+ manual_merge_map ,
1169
+ set_to_candidate_dict ,
1159
1170
conn ,
1160
1171
skiplog ,
1161
1172
):
@@ -1170,7 +1181,7 @@ def set_perform_match(
1170
1181
)
1171
1182
status = cursor .fetchone ()["status" ]
1172
1183
if status == "detection" :
1173
- update_fileset_status (cursor , matched_fileset_id , "current " )
1184
+ update_fileset_status (cursor , matched_fileset_id , "parital " )
1174
1185
set_populate_file (fileset , matched_fileset_id , conn , detection )
1175
1186
auto_merged_filesets += 1
1176
1187
if not skiplog :
@@ -1183,6 +1194,9 @@ def set_perform_match(
1183
1194
conn ,
1184
1195
)
1185
1196
delete_original_fileset (fileset_id , conn )
1197
+ remove_manual_merge_if_size_mismatch (
1198
+ matched_fileset_id , manual_merge_map , set_to_candidate_dict , conn
1199
+ )
1186
1200
elif status == "partial" or status == "full" :
1187
1201
(is_match , unmatched_files ) = is_full_checksum_match (
1188
1202
matched_fileset_id , fileset , conn
@@ -1221,7 +1235,7 @@ def set_perform_match(
1221
1235
for candidate_fileset in candidate_filesets :
1222
1236
(is_match , _ ) = is_full_checksum_match (candidate_fileset , fileset , conn )
1223
1237
if is_match :
1224
- update_fileset_status (cursor , candidate_fileset , "current " )
1238
+ update_fileset_status (cursor , candidate_fileset , "partial " )
1225
1239
set_populate_file (fileset , candidate_fileset , conn , detection )
1226
1240
auto_merged_filesets += 1
1227
1241
if not skiplog :
@@ -1234,22 +1248,14 @@ def set_perform_match(
1234
1248
conn ,
1235
1249
)
1236
1250
delete_original_fileset (fileset_id , conn )
1251
+ remove_manual_merge_if_size_mismatch (
1252
+ candidate_fileset , manual_merge_map , set_to_candidate_dict , conn
1253
+ )
1237
1254
found_match = True
1238
1255
break
1239
1256
1240
1257
if not found_match :
1241
- category_text = "Manual Merge Required"
1242
- log_text = f"Merge Fileset:{ fileset_id } manually. Possible matches are: { ', ' .join (f'Fileset:{ id } ' for id in candidate_filesets )} ."
1243
- manual_merged_filesets += 1
1244
- add_manual_merge (
1245
- candidate_filesets ,
1246
- fileset_id ,
1247
- category_text ,
1248
- log_text ,
1249
- log_text ,
1250
- user ,
1251
- conn ,
1252
- )
1258
+ manual_merge_map [fileset_id ] = candidate_filesets
1253
1259
1254
1260
return (
1255
1261
fully_matched_filesets ,
@@ -1259,6 +1265,98 @@ def set_perform_match(
1259
1265
)
1260
1266
1261
1267
1268
+ def remove_manual_merge_if_size_mismatch (
1269
+ child_fileset , manual_merge_map , set_to_candidate_dict , conn
1270
+ ):
1271
+ with conn .cursor () as cursor :
1272
+ query = """
1273
+ SELECT f.name, f.size
1274
+ FROM fileset fs
1275
+ JOIN file f ON f.fileset = fs.id
1276
+ WHERE fs.id = %s
1277
+ AND f.detection = 1
1278
+ """
1279
+ cursor .execute (query , (child_fileset ,))
1280
+ files = cursor .fetchall ()
1281
+
1282
+ for parent_fileset , child_list in manual_merge_map .items ():
1283
+ if child_fileset not in child_list :
1284
+ continue
1285
+
1286
+ for file in files :
1287
+ if file ["size" ] == - 1 :
1288
+ continue
1289
+
1290
+ query = """
1291
+ SELECT f.id
1292
+ FROM fileset fs
1293
+ JOIN file f ON f.fileset = fs.id
1294
+ WHERE fs.id = %s
1295
+ AND f.name = %s
1296
+ AND f.size = %s
1297
+ """
1298
+ cursor .execute (query , (parent_fileset , file ["name" ], file ["size" ]))
1299
+ result = cursor .fetchall ()
1300
+
1301
+ if not result :
1302
+ remove_manual_merge (
1303
+ child_fileset ,
1304
+ parent_fileset ,
1305
+ manual_merge_map ,
1306
+ set_to_candidate_dict ,
1307
+ conn ,
1308
+ )
1309
+ break
1310
+
1311
+ for parent_fileset , child_list in set_to_candidate_dict .items ():
1312
+ if child_fileset not in child_list :
1313
+ continue
1314
+
1315
+ for file in files :
1316
+ if file ["size" ] == - 1 :
1317
+ continue
1318
+
1319
+ query = """
1320
+ SELECT f.id
1321
+ FROM fileset fs
1322
+ JOIN file f ON f.fileset = fs.id
1323
+ WHERE fs.id = %s
1324
+ AND f.name = %s
1325
+ AND f.size = %s
1326
+ """
1327
+ cursor .execute (query , (parent_fileset , file ["name" ], file ["size" ]))
1328
+ result = cursor .fetchall ()
1329
+
1330
+ if not result :
1331
+ remove_manual_merge (
1332
+ child_fileset ,
1333
+ parent_fileset ,
1334
+ manual_merge_map ,
1335
+ set_to_candidate_dict ,
1336
+ conn ,
1337
+ )
1338
+ break
1339
+
1340
+
1341
+ def remove_manual_merge (
1342
+ child_fileset , parent_fileset , manual_merge_map , set_to_candidate_dict , conn
1343
+ ):
1344
+ if parent_fileset in manual_merge_map :
1345
+ if child_fileset in manual_merge_map [parent_fileset ]:
1346
+ manual_merge_map [parent_fileset ].remove (child_fileset )
1347
+ if parent_fileset in set_to_candidate_dict :
1348
+ if child_fileset in set_to_candidate_dict [parent_fileset ]:
1349
+ set_to_candidate_dict [parent_fileset ].remove (child_fileset )
1350
+
1351
+ with conn .cursor () as cursor :
1352
+ query = """
1353
+ DELETE FROM possible_merges
1354
+ WHERE child_fileset = %s
1355
+ AND parent_fileset = %s
1356
+ """
1357
+ cursor .execute (query , (child_fileset , parent_fileset ))
1358
+
1359
+
1262
1360
def add_manual_merge (
1263
1361
child_filesets , parent_fileset , category_text , log_text , print_text , user , conn
1264
1362
):
@@ -1835,15 +1933,18 @@ def set_populate_file(fileset, fileset_id, conn, detection):
1835
1933
1836
1934
filename = os .path .basename (normalised_path (file ["name" ]))
1837
1935
1838
- if (engine_name == "glk" and file ["size" ] not in candidate_file_size ) and (
1839
- ( filename . lower (), file [ "size" ]) in seen_detection_files
1840
- or (
1841
- filename .lower () not in candidate_files
1936
+ if (engine_name == "glk" and file ["size" ] not in candidate_file_size ) or (
1937
+ engine_name != "glk"
1938
+ and (
1939
+ ( filename .lower (), file [ "size" ]) in seen_detection_files
1842
1940
or (
1843
- filename .lower () in candidate_files
1844
- and (
1845
- candidate_files [filename .lower ()][1 ] != - 1
1846
- and candidate_files [filename .lower ()][1 ] != file ["size" ]
1941
+ filename .lower () not in candidate_files
1942
+ or (
1943
+ filename .lower () in candidate_files
1944
+ and (
1945
+ candidate_files [filename .lower ()][1 ] != - 1
1946
+ and candidate_files [filename .lower ()][1 ] != file ["size" ]
1947
+ )
1847
1948
)
1848
1949
)
1849
1950
)
0 commit comments