Skip to content

Commit 94cd1d9

Browse files
committed
INTEGRITY: Separate the additional checksum add logic from insert_filechecksum. Add new equal checksums for set.dat's fileset match
1 parent 161abd5 commit 94cd1d9

File tree

1 file changed

+42
-13
lines changed

1 file changed

+42
-13
lines changed

db_functions.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -233,26 +233,34 @@ def insert_file(file, detection, src, conn):
233233
cursor.execute("SET @file_last = LAST_INSERT_ID()")
234234

235235

236-
def insert_filechecksum(file, checktype, conn):
236+
def insert_filechecksum(file, checktype, file_id, conn):
237237
if checktype not in file:
238238
return
239239

240240
checksum = file[checktype]
241241
checksize, checktype, checksum = get_checksum_props(checktype, checksum)
242242

243-
query = f"INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, '{checksize}', '{checktype}', '{checksum}')"
243+
query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
244+
with conn.cursor() as cursor:
245+
cursor.execute(query, (file_id, checksize, checktype, checksum))
246+
247+
add_all_equal_checksums(checksize, checktype, checksum, file_id, conn)
248+
249+
250+
def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
251+
"""
252+
We can update all the checksums when file size is less than the checksum size type, as all checksums are equal in that case.
253+
"""
244254
with conn.cursor() as cursor:
245-
cursor.execute(query)
246255
if "md5" not in checktype:
247256
return
248-
249257
size_name = "size"
250258
if checktype[-1] == "r":
251259
size_name += "-rd"
252260
if checktype[-1] == "s":
253261
size_name += "-d"
254262

255-
cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = @file_last")
263+
cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = {file_id}")
256264
result = cursor.fetchone()
257265
if not result:
258266
return
@@ -281,9 +289,10 @@ def insert_filechecksum(file, checktype, conn):
281289
checksum_size = exploded.pop()
282290
checksum_type = "-".join(exploded)
283291

284-
query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, %s, %s, %s)"
285-
with conn.cursor() as cursor:
286-
cursor.execute(query, (checksum_size, checksum_type, checksum))
292+
query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
293+
cursor.execute(
294+
query, (file_id, checksum_size, checksum_type, checksum)
295+
)
287296

288297

289298
def delete_filesets(conn):
@@ -558,9 +567,13 @@ def db_insert(data_arr, username=None, skiplog=False):
558567

559568
for file in unique_files:
560569
insert_file(file, detection, src, conn)
570+
file_id = None
571+
with conn.cursor() as cursor:
572+
cursor.execute("SELECT @file_last AS file_id")
573+
file_id = cursor.fetchone()["file_id"]
561574
for key, value in file.items():
562575
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
563-
insert_filechecksum(file, key, conn)
576+
insert_filechecksum(file, key, file_id, conn)
564577

565578
if detection:
566579
conn.cursor().execute(
@@ -1070,7 +1083,6 @@ def set_perform_match(
10701083
with conn.cursor() as cursor:
10711084
if len(candidate_filesets) == 1:
10721085
matched_fileset_id = candidate_filesets[0]
1073-
10741086
cursor.execute(
10751087
"SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
10761088
)
@@ -1123,6 +1135,7 @@ def set_perform_match(
11231135

11241136
elif len(candidate_filesets) > 1:
11251137
found_match = False
1138+
11261139
for candidate_fileset in candidate_filesets:
11271140
(is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn)
11281141
if is_match:
@@ -1579,7 +1592,7 @@ def populate_file(fileset, fileset_id, conn, detection):
15791592

15801593
for key, value in file.items():
15811594
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
1582-
insert_filechecksum(file, key, conn)
1595+
insert_filechecksum(file, key, file_id, conn)
15831596
if value in target_files_dict and not file_exists:
15841597
cursor.execute(
15851598
f"SELECT detection_type FROM file WHERE id = {target_files_dict[value]['id']}"
@@ -1683,7 +1696,10 @@ def set_populate_file(fileset, fileset_id, conn, detection):
16831696
cursor.execute("SET @file_last = LAST_INSERT_ID()")
16841697
cursor.execute("SELECT @file_last AS file_id")
16851698

1686-
insert_filechecksum(file, "md5", conn)
1699+
cursor.execute("SELECT @file_last AS file_id")
1700+
file_id = cursor.fetchone()["file_id"]
1701+
1702+
insert_filechecksum(file, "md5", file_id, conn)
16871703

16881704
else:
16891705
query = """
@@ -1701,6 +1717,7 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17011717
candidate_files[filename.lower()][0],
17021718
),
17031719
)
1720+
17041721
query = """
17051722
INSERT INTO filechecksum (file, checksize, checktype, checksum)
17061723
VALUES (%s, %s, %s, %s)
@@ -1714,6 +1731,14 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17141731
checksum,
17151732
),
17161733
)
1734+
1735+
add_all_equal_checksums(
1736+
checksize,
1737+
checktype,
1738+
checksum,
1739+
candidate_files[filename.lower()][0],
1740+
conn,
1741+
)
17171742
seen_detection_files.add((filename.lower(), file["size"]))
17181743

17191744

@@ -1745,9 +1770,13 @@ def insert_new_fileset(
17451770
if fileset_id:
17461771
for file in fileset["rom"]:
17471772
insert_file(file, detection, src, conn)
1773+
file_id = None
1774+
with conn.cursor() as cursor:
1775+
cursor.execute("SELECT @file_last AS file_id")
1776+
file_id = cursor.fetchone()["file_id"]
17481777
for key, value in file.items():
17491778
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
1750-
insert_filechecksum(file, key, conn)
1779+
insert_filechecksum(file, key, file_id, conn)
17511780
return (fileset_id, existing)
17521781

17531782

0 commit comments

Comments
 (0)