@@ -1240,14 +1240,18 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
1240
1240
matched_detection_files AS (
1241
1241
SELECT cf.fileset_id, COUNT(*) AS match_files_count
1242
1242
FROM candidate_fileset cf
1243
- JOIN set_fileset sf ON cf.name = sf.name AND (cf.size = sf.size OR cf.size = -1)
1243
+ JOIN set_fileset sf ON ( (
1244
+ cf.name = sf.name
1245
+ OR
1246
+ REGEXP_REPLACE(cf.name, '^.*[\\ \\ /]', '') = REGEXP_REPLACE(sf.name, '^.*[\\ \\ /]', '')
1247
+ ) AND (cf.size = sf.size OR cf.size = -1) )
1244
1248
GROUP BY cf.fileset_id
1245
1249
),
1246
1250
valid_matched_detection_files AS (
1247
1251
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
1248
1252
FROM matched_detection_files mdf
1249
1253
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
1250
- WHERE tdf.detection_files_found = mdf.match_files_count
1254
+ WHERE tdf.detection_files_found < = mdf.match_files_count
1251
1255
),
1252
1256
max_match_count AS (
1253
1257
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
@@ -1256,7 +1260,6 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
1256
1260
FROM valid_matched_detection_files vmdf
1257
1261
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
1258
1262
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
1259
- WHERE vmdf.valid_match_files_count = tdf.detection_files_found;
1260
1263
"""
1261
1264
1262
1265
cursor .execute (
@@ -1619,13 +1622,16 @@ def populate_file(fileset, fileset_id, conn, detection):
1619
1622
1620
1623
def set_populate_file (fileset , fileset_id , conn , detection ):
1621
1624
"""
1622
- TODO
1625
+ Updates the old fileset in case of a match. Further deletes the newly created fileset which is not needed anymore.
1623
1626
"""
1624
1627
with conn .cursor () as cursor :
1625
- cursor .execute (f"SELECT id, name FROM file WHERE fileset = { fileset_id } " )
1628
+ # Extracting the filename from the filepath.
1629
+ cursor .execute (
1630
+ f"SELECT id, REGEXP_REPLACE(name, '^.*[\\ \\ /]', '') AS name, size FROM file WHERE fileset = { fileset_id } "
1631
+ )
1626
1632
target_files = cursor .fetchall ()
1627
1633
candidate_files = {
1628
- target_file ["name" ].lower (): target_file ["id" ]
1634
+ target_file ["name" ].lower (): [ target_file ["id" ], target_file [ "size" ] ]
1629
1635
for target_file in target_files
1630
1636
}
1631
1637
@@ -1634,7 +1640,15 @@ def set_populate_file(fileset, fileset_id, conn, detection):
1634
1640
continue
1635
1641
checksize , checktype , checksum = get_checksum_props ("md5" , file ["md5" ])
1636
1642
1637
- if file ["name" ].lower () not in candidate_files :
1643
+ filename = os .path .basename (normalised_path (file ["name" ]))
1644
+
1645
+ if filename .lower () not in candidate_files or (
1646
+ filename .lower () in candidate_files
1647
+ and (
1648
+ candidate_files [filename .lower ()][1 ] != - 1
1649
+ and candidate_files [filename .lower ()][1 ] != file ["size" ]
1650
+ )
1651
+ ):
1638
1652
name = normalised_path (file ["name" ])
1639
1653
values = [name ]
1640
1654
@@ -1658,11 +1672,18 @@ def set_populate_file(fileset, fileset_id, conn, detection):
1658
1672
else :
1659
1673
query = """
1660
1674
UPDATE file
1661
- SET size = %s
1675
+ SET size = %s,
1676
+ name = %s
1662
1677
WHERE id = %s
1663
1678
"""
1679
+ # Filtering was by filename, but we are still updating the file with the original filepath.
1664
1680
cursor .execute (
1665
- query , (file ["size" ], candidate_files [file ["name" ].lower ()])
1681
+ query ,
1682
+ (
1683
+ file ["size" ],
1684
+ normalised_path (file ["name" ]),
1685
+ candidate_files [filename .lower ()][0 ],
1686
+ ),
1666
1687
)
1667
1688
query = """
1668
1689
INSERT INTO filechecksum (file, checksize, checktype, checksum)
@@ -1671,7 +1692,7 @@ def set_populate_file(fileset, fileset_id, conn, detection):
1671
1692
cursor .execute (
1672
1693
query ,
1673
1694
(
1674
- candidate_files [file [ "name" ] .lower ()],
1695
+ candidate_files [filename .lower ()][ 0 ],
1675
1696
checksize ,
1676
1697
checktype ,
1677
1698
checksum ,
0 commit comments