Skip to content

Commit 93d01e2

Browse files
committed
Add additional filtering logic for glk engines
1 parent c502ba3 commit 93d01e2

File tree

1 file changed

+106
-12
lines changed

1 file changed

+106
-12
lines changed

db_functions.py

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -965,9 +965,17 @@ def set_process(
965965
if existing:
966966
continue
967967

968-
candidate_filesets = set_filter_candidate_filesets(
969-
fileset_id, fileset, transaction_id, conn
970-
)
968+
# Separating out the matching logic for glk engine
969+
engine_name = fileset["sourcefile"].split("-")[0]
970+
971+
if engine_name == "glk":
972+
candidate_filesets = set_glk_filter_candidate_filesets(
973+
fileset_id, fileset, transaction_id, engine_name, conn
974+
)
975+
else:
976+
candidate_filesets = set_filter_candidate_filesets(
977+
fileset_id, fileset, transaction_id, conn
978+
)
971979

972980
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
973981
if len(candidate_filesets) == 0:
@@ -1288,6 +1296,75 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
12881296
return (len(unmatched_files) == 0, unmatched_files)
12891297

12901298

1299+
def set_glk_filter_candidate_filesets(
1300+
fileset_id, fileset, transaction_id, engine_name, conn
1301+
):
1302+
"""
1303+
Returns a list of candidate filesets for glk engines that can be merged
1304+
"""
1305+
with conn.cursor() as cursor:
1306+
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1307+
1308+
query = """
1309+
WITH candidate_fileset AS (
1310+
SELECT fs.id AS fileset_id, f.size
1311+
FROM file f
1312+
JOIN fileset fs ON f.fileset = fs.id
1313+
JOIN game g ON g.id = fs.game
1314+
JOIN engine e ON e.id = g.engine
1315+
JOIN transactions t ON t.fileset = fs.id
1316+
WHERE fs.id != %s
1317+
AND e.engineid = %s
1318+
AND f.detection = 1
1319+
AND t.transaction != %s
1320+
AND g.gameid LIKE %s
1321+
),
1322+
total_detection_files AS (
1323+
SELECT cf.fileset_id, COUNT(*) AS detection_files_found
1324+
FROM candidate_fileset cf
1325+
GROUP BY fileset_id
1326+
),
1327+
set_fileset AS (
1328+
SELECT size FROM file
1329+
WHERE fileset = %s
1330+
),
1331+
matched_detection_files AS (
1332+
SELECT cf.fileset_id, COUNT(*) AS match_files_count
1333+
FROM candidate_fileset cf
1334+
JOIN set_fileset sf ON
1335+
cf.size = sf.size OR cf.size = 0
1336+
GROUP BY cf.fileset_id
1337+
),
1338+
valid_matched_detection_files AS (
1339+
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
1340+
FROM matched_detection_files mdf
1341+
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
1342+
WHERE tdf.detection_files_found <= mdf.match_files_count
1343+
),
1344+
max_match_count AS (
1345+
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
1346+
)
1347+
SELECT vmdf.fileset_id
1348+
FROM valid_matched_detection_files vmdf
1349+
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
1350+
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
1351+
"""
1352+
1353+
gameid_pattern = f"%{fileset['name']}%"
1354+
1355+
cursor.execute(
1356+
query, (fileset_id, engine_name, transaction_id, gameid_pattern, fileset_id)
1357+
)
1358+
rows = cursor.fetchall()
1359+
1360+
candidates = []
1361+
if rows:
1362+
for row in rows:
1363+
candidates.append(row["fileset_id"])
1364+
1365+
return candidates
1366+
1367+
12911368
def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
12921369
"""
12931370
Returns a list of candidate filesets that can be merged
@@ -1715,6 +1792,13 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17151792
for target_file in target_files
17161793
}
17171794

1795+
# For glk engines
1796+
candidate_file_size = {
1797+
target_file["size"]: target_file["id"] for target_file in target_files
1798+
}
1799+
1800+
engine_name = fileset["sourcefile"].split("-")[0]
1801+
17181802
seen_detection_files = set()
17191803

17201804
for file in fileset["rom"]:
@@ -1724,13 +1808,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17241808

17251809
filename = os.path.basename(normalised_path(file["name"]))
17261810

1727-
if ((filename.lower(), file["size"]) in seen_detection_files) or (
1728-
filename.lower() not in candidate_files
1811+
if (engine_name == "glk" and file["size"] not in candidate_file_size) and (
1812+
(filename.lower(), file["size"]) in seen_detection_files
17291813
or (
1730-
filename.lower() in candidate_files
1731-
and (
1732-
candidate_files[filename.lower()][1] != -1
1733-
and candidate_files[filename.lower()][1] != file["size"]
1814+
filename.lower() not in candidate_files
1815+
or (
1816+
filename.lower() in candidate_files
1817+
and (
1818+
candidate_files[filename.lower()][1] != -1
1819+
and candidate_files[filename.lower()][1] != file["size"]
1820+
)
17341821
)
17351822
)
17361823
):
@@ -1764,13 +1851,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17641851
name = %s
17651852
WHERE id = %s
17661853
"""
1854+
17671855
# Filtering was by filename, but we are still updating the file with the original filepath.
17681856
cursor.execute(
17691857
query,
17701858
(
17711859
file["size"],
17721860
normalised_path(file["name"]),
1773-
candidate_files[filename.lower()][0],
1861+
candidate_files[filename.lower()][0]
1862+
if engine_name != "glk"
1863+
else candidate_file_size[file["size"]],
17741864
),
17751865
)
17761866

@@ -1781,7 +1871,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17811871
cursor.execute(
17821872
query,
17831873
(
1784-
candidate_files[filename.lower()][0],
1874+
candidate_files[filename.lower()][0]
1875+
if engine_name != "glk"
1876+
else candidate_file_size[file["size"]],
17851877
checksize,
17861878
checktype,
17871879
checksum,
@@ -1792,7 +1884,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17921884
checksize,
17931885
checktype,
17941886
checksum,
1795-
candidate_files[filename.lower()][0],
1887+
candidate_files[filename.lower()][0]
1888+
if engine_name != "glk"
1889+
else candidate_file_size[file["size"]],
17961890
conn,
17971891
)
17981892
seen_detection_files.add((filename.lower(), file["size"]))

0 commit comments

Comments
 (0)