Skip to content

Commit c38881c

Browse files
committed
INTEGRITY: Limit match fileset to 1 in remove_manual_merge_if_size_mismatch
1 parent df41d7a commit c38881c

File tree

1 file changed

+42
-69
lines changed

1 file changed

+42
-69
lines changed

db_functions.py

Lines changed: 42 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,7 +1179,7 @@ def set_perform_match(
11791179
skiplog,
11801180
):
11811181
"""
1182-
TODO
1182+
"Performs matching for set.dat"
11831183
"""
11841184
with conn.cursor() as cursor:
11851185
if len(candidate_filesets) == 1:
@@ -1189,11 +1189,11 @@ def set_perform_match(
11891189
)
11901190
status = cursor.fetchone()["status"]
11911191
if status == "detection":
1192-
update_fileset_status(cursor, matched_fileset_id, "parital")
1192+
update_fileset_status(cursor, matched_fileset_id, "partial")
11931193
set_populate_file(fileset, matched_fileset_id, conn, detection)
11941194
auto_merged_filesets += 1
11951195
if not skiplog:
1196-
set_log_matched_fileset(
1196+
log_matched_fileset(
11971197
src,
11981198
fileset_id,
11991199
matched_fileset_id,
@@ -1247,7 +1247,7 @@ def set_perform_match(
12471247
set_populate_file(fileset, candidate_fileset, conn, detection)
12481248
auto_merged_filesets += 1
12491249
if not skiplog:
1250-
set_log_matched_fileset(
1250+
log_matched_fileset(
12511251
src,
12521252
fileset_id,
12531253
candidate_fileset,
@@ -1287,63 +1287,37 @@ def remove_manual_merge_if_size_mismatch(
12871287
cursor.execute(query, (child_fileset,))
12881288
files = cursor.fetchall()
12891289

1290-
for parent_fileset, child_list in manual_merge_map.items():
1291-
if child_fileset not in child_list:
1292-
continue
1293-
1294-
for file in files:
1295-
if file["size"] == -1:
1296-
continue
1297-
1298-
query = """
1299-
SELECT f.id
1300-
FROM fileset fs
1301-
JOIN file f ON f.fileset = fs.id
1302-
WHERE fs.id = %s
1303-
AND f.name = %s
1304-
AND f.size = %s
1305-
"""
1306-
cursor.execute(query, (parent_fileset, file["name"], file["size"]))
1307-
result = cursor.fetchall()
1308-
1309-
if not result:
1310-
remove_manual_merge(
1311-
child_fileset,
1312-
parent_fileset,
1313-
manual_merge_map,
1314-
set_to_candidate_dict,
1315-
conn,
1316-
)
1317-
break
1318-
1319-
for parent_fileset, child_list in set_to_candidate_dict.items():
1320-
if child_fileset not in child_list:
1321-
continue
1322-
1323-
for file in files:
1324-
if file["size"] == -1:
1290+
for possible_removals in [manual_merge_map, set_to_candidate_dict]:
1291+
for parent_fileset, child_list in possible_removals.items():
1292+
if child_fileset not in child_list:
13251293
continue
13261294

1327-
query = """
1328-
SELECT f.id
1329-
FROM fileset fs
1330-
JOIN file f ON f.fileset = fs.id
1331-
WHERE fs.id = %s
1332-
AND f.name = %s
1333-
AND f.size = %s
1334-
"""
1335-
cursor.execute(query, (parent_fileset, file["name"], file["size"]))
1336-
result = cursor.fetchall()
1337-
1338-
if not result:
1339-
remove_manual_merge(
1340-
child_fileset,
1341-
parent_fileset,
1342-
manual_merge_map,
1343-
set_to_candidate_dict,
1344-
conn,
1345-
)
1346-
break
1295+
for file in files:
1296+
if file["size"] == -1:
1297+
continue
1298+
1299+
query = """
1300+
SELECT fs.id
1301+
FROM fileset fs
1302+
JOIN file f ON f.fileset = fs.id
1303+
WHERE fs.id = %s
1304+
AND REGEXP_REPLACE(f.name, '^.*[\\\\/]', '') = %s
1305+
AND f.size = %s
1306+
LIMIT 1
1307+
"""
1308+
filename = os.path.basename(normalised_path(file["name"]))
1309+
cursor.execute(query, (parent_fileset, filename, file["size"]))
1310+
result = cursor.fetchall()
1311+
1312+
if not result:
1313+
remove_manual_merge(
1314+
child_fileset,
1315+
parent_fileset,
1316+
manual_merge_map,
1317+
set_to_candidate_dict,
1318+
conn,
1319+
)
1320+
break
13471321

13481322

13491323
def remove_manual_merge(
@@ -2063,21 +2037,20 @@ def insert_new_fileset(
20632037
cursor.execute("SELECT @file_last AS file_id")
20642038
file_id = cursor.fetchone()["file_id"]
20652039
for key, value in file.items():
2066-
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
2040+
if key not in [
2041+
"name",
2042+
"size",
2043+
"size-r",
2044+
"size-rd",
2045+
"sha1",
2046+
"crc",
2047+
"modification-time",
2048+
]:
20672049
insert_filechecksum(file, key, file_id, conn)
20682050
return (fileset_id, existing)
20692051

20702052

20712053
def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
2072-
category_text = f"Matched from {src}"
2073-
log_text = f"Matched Fileset:{fileset_id}. State {state}."
2074-
log_last = create_log(
2075-
escape_string(category_text), user, escape_string(log_text), conn
2076-
)
2077-
update_history(fileset_last, fileset_id, conn, log_last)
2078-
2079-
2080-
def set_log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
20812054
category_text = f"Matched from {src}"
20822055
log_text = (
20832056
f"Matched Fileset:{fileset_last} with Fileset:{fileset_id}. State {state}."

0 commit comments

Comments
 (0)