Skip to content

Commit 1fee585

Browse files
committed
INTEGRITY: Add additional filtering logic for glk engines
1 parent c502ba3 commit 1fee585

File tree

1 file changed

+115
-12
lines changed

1 file changed

+115
-12
lines changed

db_functions.py

Lines changed: 115 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -965,9 +965,17 @@ def set_process(
965965
if existing:
966966
continue
967967

968-
candidate_filesets = set_filter_candidate_filesets(
969-
fileset_id, fileset, transaction_id, conn
970-
)
968+
# Separating out the matching logic for glk engine
969+
engine_name = fileset["sourcefile"].split("-")[0]
970+
971+
if engine_name == "glk":
972+
candidate_filesets = set_glk_filter_candidate_filesets(
973+
fileset_id, fileset, transaction_id, engine_name, conn
974+
)
975+
else:
976+
candidate_filesets = set_filter_candidate_filesets(
977+
fileset_id, fileset, transaction_id, conn
978+
)
971979

972980
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
973981
if len(candidate_filesets) == 0:
@@ -1288,6 +1296,84 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
12881296
return (len(unmatched_files) == 0, unmatched_files)
12891297

12901298

1299+
def set_glk_filter_candidate_filesets(
1300+
fileset_id, fileset, transaction_id, engine_name, conn
1301+
):
1302+
"""
1303+
Returns a list of candidate filesets for glk engines that can be merged
1304+
"""
1305+
with conn.cursor() as cursor:
1306+
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1307+
1308+
query = """
1309+
WITH candidate_fileset AS (
1310+
SELECT fs.id AS fileset_id, f.size
1311+
FROM file f
1312+
JOIN fileset fs ON f.fileset = fs.id
1313+
JOIN game g ON g.id = fs.game
1314+
JOIN engine e ON e.id = g.engine
1315+
JOIN transactions t ON t.fileset = fs.id
1316+
WHERE fs.id != %s
1317+
AND e.engineid = %s
1318+
AND f.detection = 1
1319+
AND t.transaction != %s
1320+
AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))
1321+
),
1322+
total_detection_files AS (
1323+
SELECT cf.fileset_id, COUNT(*) AS detection_files_found
1324+
FROM candidate_fileset cf
1325+
GROUP BY fileset_id
1326+
),
1327+
set_fileset AS (
1328+
SELECT size FROM file
1329+
WHERE fileset = %s
1330+
),
1331+
matched_detection_files AS (
1332+
SELECT cf.fileset_id, COUNT(*) AS match_files_count
1333+
FROM candidate_fileset cf
1334+
JOIN set_fileset sf ON
1335+
cf.size = sf.size OR cf.size = 0
1336+
GROUP BY cf.fileset_id
1337+
),
1338+
valid_matched_detection_files AS (
1339+
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
1340+
FROM matched_detection_files mdf
1341+
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
1342+
WHERE tdf.detection_files_found <= mdf.match_files_count
1343+
),
1344+
max_match_count AS (
1345+
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
1346+
)
1347+
SELECT vmdf.fileset_id
1348+
FROM valid_matched_detection_files vmdf
1349+
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
1350+
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
1351+
"""
1352+
1353+
gameid_pattern = f"%{fileset['name']}%"
1354+
1355+
cursor.execute(
1356+
query,
1357+
(
1358+
fileset_id,
1359+
engine_name,
1360+
transaction_id,
1361+
fileset["name"],
1362+
fileset["name"],
1363+
gameid_pattern,
1364+
fileset_id,
1365+
),
1366+
)
1367+
rows = cursor.fetchall()
1368+
1369+
candidates = []
1370+
if rows:
1371+
for row in rows:
1372+
candidates.append(row["fileset_id"])
1373+
1374+
return candidates
1375+
1376+
12911377
def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
12921378
"""
12931379
Returns a list of candidate filesets that can be merged
@@ -1715,6 +1801,13 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17151801
for target_file in target_files
17161802
}
17171803

1804+
# For glk engines
1805+
candidate_file_size = {
1806+
target_file["size"]: target_file["id"] for target_file in target_files
1807+
}
1808+
1809+
engine_name = fileset["sourcefile"].split("-")[0]
1810+
17181811
seen_detection_files = set()
17191812

17201813
for file in fileset["rom"]:
@@ -1724,13 +1817,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17241817

17251818
filename = os.path.basename(normalised_path(file["name"]))
17261819

1727-
if ((filename.lower(), file["size"]) in seen_detection_files) or (
1728-
filename.lower() not in candidate_files
1820+
if (engine_name == "glk" and file["size"] not in candidate_file_size) and (
1821+
(filename.lower(), file["size"]) in seen_detection_files
17291822
or (
1730-
filename.lower() in candidate_files
1731-
and (
1732-
candidate_files[filename.lower()][1] != -1
1733-
and candidate_files[filename.lower()][1] != file["size"]
1823+
filename.lower() not in candidate_files
1824+
or (
1825+
filename.lower() in candidate_files
1826+
and (
1827+
candidate_files[filename.lower()][1] != -1
1828+
and candidate_files[filename.lower()][1] != file["size"]
1829+
)
17341830
)
17351831
)
17361832
):
@@ -1764,13 +1860,16 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17641860
name = %s
17651861
WHERE id = %s
17661862
"""
1863+
17671864
# Filtering was by filename, but we are still updating the file with the original filepath.
17681865
cursor.execute(
17691866
query,
17701867
(
17711868
file["size"],
17721869
normalised_path(file["name"]),
1773-
candidate_files[filename.lower()][0],
1870+
candidate_files[filename.lower()][0]
1871+
if engine_name != "glk"
1872+
else candidate_file_size[file["size"]],
17741873
),
17751874
)
17761875

@@ -1781,7 +1880,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17811880
cursor.execute(
17821881
query,
17831882
(
1784-
candidate_files[filename.lower()][0],
1883+
candidate_files[filename.lower()][0]
1884+
if engine_name != "glk"
1885+
else candidate_file_size[file["size"]],
17851886
checksize,
17861887
checktype,
17871888
checksum,
@@ -1792,7 +1893,9 @@ def set_populate_file(fileset, fileset_id, conn, detection):
17921893
checksize,
17931894
checktype,
17941895
checksum,
1795-
candidate_files[filename.lower()][0],
1896+
candidate_files[filename.lower()][0]
1897+
if engine_name != "glk"
1898+
else candidate_file_size[file["size"]],
17961899
conn,
17971900
)
17981901
seen_detection_files.add((filename.lower(), file["size"]))

0 commit comments

Comments
 (0)