@@ -281,8 +281,6 @@ def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
281
281
size_name = "size"
282
282
if checktype [- 1 ] == "r" :
283
283
size_name += "-rd"
284
- if checktype [- 1 ] == "s" :
285
- size_name += "-d"
286
284
287
285
cursor .execute (f"SELECT `{ size_name } ` FROM file WHERE id = { file_id } " )
288
286
result = cursor .fetchone ()
@@ -1345,7 +1343,6 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
1345
1343
`size-rd` = %s
1346
1344
"""
1347
1345
sizes = filepath_to_sizes_map [filepath ]
1348
- print (sizes )
1349
1346
if is_candidate_detection :
1350
1347
query += ",name = %s WHERE id = %s"
1351
1348
params = (
@@ -1462,10 +1459,10 @@ def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
1462
1459
1463
1460
def get_unmatched_files (candidate_fileset , fileset , conn ):
1464
1461
"""
1465
- Checks if all checksums from candidate_fileset match scan file checksums.
1462
+ Checks if all checksums from candidate_fileset match dat file checksums.
1466
1463
Returns:
1467
1464
unmatched_candidate_files: candidate files whose checksums weren't found in scan
1468
- unmatched_scan_files: scan files whose checksums weren't matched by candidate
1465
+ unmatched_dat_files: dat files whose checksums weren't matched by candidate
1469
1466
"""
1470
1467
with conn .cursor () as cursor :
1471
1468
cursor .execute (
@@ -1474,18 +1471,18 @@ def get_unmatched_files(candidate_fileset, fileset, conn):
1474
1471
candidate_file_rows = cursor .fetchall ()
1475
1472
candidate_files = {row ["id" ]: row ["name" ] for row in candidate_file_rows }
1476
1473
1477
- scan_checksums = set ()
1478
- scan_names_by_checksum = {}
1474
+ dat_checksums = set ()
1475
+ dat_names_by_checksum = {}
1479
1476
1480
1477
for file in fileset ["rom" ]:
1481
1478
base_name = os .path .basename (normalised_path (file ["name" ])).lower ()
1482
1479
for key in file :
1483
1480
if key .startswith ("md5" ):
1484
- scan_checksums .add ((file [key ], base_name ))
1485
- scan_names_by_checksum [(file [key ], base_name )] = file ["name" ]
1481
+ dat_checksums .add ((file [key ], base_name ))
1482
+ dat_names_by_checksum [(file [key ], base_name )] = file ["name" ]
1486
1483
1487
1484
unmatched_candidate_files = []
1488
- matched_scan_pairs = set ()
1485
+ matched_dat_pairs = set ()
1489
1486
1490
1487
for file_id , file_name in candidate_files .items ():
1491
1488
cursor .execute (
@@ -1498,21 +1495,21 @@ def get_unmatched_files(candidate_fileset, fileset, conn):
1498
1495
1499
1496
for row in checksum_rows :
1500
1497
checksum = row ["checksum" ]
1501
- if (checksum , base_name ) in scan_checksums :
1502
- matched_scan_pairs .add ((checksum , base_name ))
1498
+ if (checksum , base_name ) in dat_checksums :
1499
+ matched_dat_pairs .add ((checksum , base_name ))
1503
1500
match_found = True
1504
1501
1505
1502
if not match_found :
1506
1503
unmatched_candidate_files .append (file_name )
1507
1504
1508
- unmatched_scan_files = {
1509
- scan_names_by_checksum [key ]
1510
- for key in scan_checksums
1511
- if key not in matched_scan_pairs
1505
+ unmatched_dat_files = {
1506
+ dat_names_by_checksum [key ]
1507
+ for key in dat_checksums
1508
+ if key not in matched_dat_pairs
1512
1509
}
1513
- unmatched_scan_files = list (unmatched_scan_files )
1510
+ unmatched_dat_files = list (unmatched_dat_files )
1514
1511
1515
- return (unmatched_candidate_files , unmatched_scan_files )
1512
+ return (unmatched_candidate_files , unmatched_dat_files )
1516
1513
1517
1514
1518
1515
def is_full_detection_checksum_match (candidate_fileset , fileset , conn ):
@@ -1524,7 +1521,7 @@ def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
1524
1521
"""
1525
1522
with conn .cursor () as cursor :
1526
1523
cursor .execute (
1527
- "SELECT id, name FROM file WHERE detection=1 AND fileset = %s" ,
1524
+ "SELECT id, REGEXP_REPLACE(name, '^.*[ \\ \\ /]', '') AS name FROM file WHERE detection=1 AND fileset = %s" ,
1528
1525
(candidate_fileset ,),
1529
1526
)
1530
1527
target_files = cursor .fetchall ()
@@ -1682,7 +1679,7 @@ def set_process(
1682
1679
console_message = "Candidate filtering finished."
1683
1680
console_log (console_message )
1684
1681
console_message = (
1685
- f"{ dropped_early_no_candidate } Filesets Dropped - No candidates found ."
1682
+ f"{ dropped_early_no_candidate } Filesets Dropped for No candidates."
1686
1683
)
1687
1684
console_log (console_message )
1688
1685
console_message = "Looking for duplicates..."
@@ -1872,9 +1869,15 @@ def set_perform_match(
1872
1869
matched_fileset_id , manual_merge_map , set_to_candidate_dict , conn
1873
1870
)
1874
1871
elif status == "partial" or status == "full" :
1875
- (is_match , unmatched_files ) = is_full_checksum_match (
1872
+ (unmatched_candidate_files , unmatched_dat_files ) = get_unmatched_files (
1876
1873
matched_fileset_id , fileset , conn
1877
1874
)
1875
+ is_match = (
1876
+ True
1877
+ if len (unmatched_candidate_files ) == 0
1878
+ and len (unmatched_dat_files ) == 0
1879
+ else False
1880
+ )
1878
1881
if is_match :
1879
1882
category_text = "Already present"
1880
1883
log_text = f"Already present as - Fileset:{ matched_fileset_id } . Deleting Fileset:{ fileset_id } "
@@ -1890,7 +1893,8 @@ def set_perform_match(
1890
1893
1891
1894
else :
1892
1895
category_text = "Mismatch"
1893
- log_text = f"Fileset:{ fileset_id } mismatched with Fileset:{ matched_fileset_id } with status:{ status } . Try manual merge."
1896
+ log_text = f"Fileset:{ fileset_id } mismatched with Fileset:{ matched_fileset_id } with status:{ status } . Try manual merge. Unmatched Files in set.dat fileset = { len (unmatched_dat_files )} Unmatched Files in candidate fileset = { len (unmatched_candidate_files )} . List of unmatched files scan.dat : { ', ' .join (scan_file for scan_file in unmatched_dat_files )} , List of unmatched files full fileset : { ', ' .join (scan_file for scan_file in unmatched_candidate_files )} "
1897
+ console_log (log_text )
1894
1898
# print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
1895
1899
mismatch_filesets += 1
1896
1900
add_manual_merge (
@@ -1904,7 +1908,6 @@ def set_perform_match(
1904
1908
1905
1909
elif len (candidate_filesets ) > 1 :
1906
1910
found_match = False
1907
-
1908
1911
for candidate_fileset in candidate_filesets :
1909
1912
(is_match , _ ) = is_full_checksum_match (candidate_fileset , fileset , conn )
1910
1913
if is_match :
0 commit comments