Skip to content

Commit 57df340

Browse files
committed
INTEGRITY: Merge filtering logic for glk with existing set.dat filtering.
1 parent 4dd7e29 commit 57df340

File tree

1 file changed

+28
-91
lines changed

1 file changed

+28
-91
lines changed

db_functions.py

Lines changed: 28 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,14 +1659,9 @@ def set_process(
16591659
# Separating out the matching logic for glk engine
16601660
engine_name = fileset["sourcefile"].split("-")[0]
16611661

1662-
if engine_name == "glk":
1663-
(candidate_filesets, fileset_count) = set_glk_filter_candidate_filesets(
1664-
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
1665-
)
1666-
else:
1667-
(candidate_filesets, fileset_count) = set_filter_candidate_filesets(
1668-
fileset_id, fileset, fileset_count, transaction_id, conn
1669-
)
1662+
(candidate_filesets, fileset_count) = set_filter_candidate_filesets(
1663+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
1664+
)
16701665

16711666
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
16721667
if len(candidate_filesets) == 0:
@@ -2071,93 +2066,16 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
20712066
return (len(unmatched_files) == 0, unmatched_files)
20722067

20732068

2074-
def set_glk_filter_candidate_filesets(
2075-
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
2076-
):
2077-
"""
2078-
Returns a list of candidate filesets for glk engines that can be merged
2079-
"""
2080-
with conn.cursor() as cursor:
2081-
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
2082-
fileset_count += 1
2083-
console_log_candidate_filtering(fileset_count)
2084-
query = """
2085-
WITH candidate_fileset AS (
2086-
SELECT fs.id AS fileset_id, f.size
2087-
FROM file f
2088-
JOIN fileset fs ON f.fileset = fs.id
2089-
JOIN game g ON g.id = fs.game
2090-
JOIN engine e ON e.id = g.engine
2091-
JOIN transactions t ON t.fileset = fs.id
2092-
WHERE fs.id != %s
2093-
AND e.engineid = %s
2094-
AND f.detection = 1
2095-
AND t.transaction != %s
2096-
AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))
2097-
),
2098-
total_detection_files AS (
2099-
SELECT cf.fileset_id, COUNT(*) AS detection_files_found
2100-
FROM candidate_fileset cf
2101-
GROUP BY fileset_id
2102-
),
2103-
set_fileset AS (
2104-
SELECT size FROM file
2105-
WHERE fileset = %s
2106-
),
2107-
matched_detection_files AS (
2108-
SELECT cf.fileset_id, COUNT(*) AS match_files_count
2109-
FROM candidate_fileset cf
2110-
JOIN set_fileset sf ON
2111-
cf.size = sf.size OR cf.size = 0
2112-
GROUP BY cf.fileset_id
2113-
),
2114-
valid_matched_detection_files AS (
2115-
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
2116-
FROM matched_detection_files mdf
2117-
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
2118-
WHERE tdf.detection_files_found <= mdf.match_files_count
2119-
),
2120-
max_match_count AS (
2121-
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
2122-
)
2123-
SELECT vmdf.fileset_id
2124-
FROM valid_matched_detection_files vmdf
2125-
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
2126-
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
2127-
"""
2128-
2129-
gameid_pattern = f"%{fileset['name']}%"
2130-
2131-
cursor.execute(
2132-
query,
2133-
(
2134-
fileset_id,
2135-
engine_name,
2136-
transaction_id,
2137-
fileset["name"],
2138-
fileset["name"],
2139-
gameid_pattern,
2140-
fileset_id,
2141-
),
2142-
)
2143-
rows = cursor.fetchall()
2144-
2145-
candidates = []
2146-
if rows:
2147-
for row in rows:
2148-
candidates.append(row["fileset_id"])
2149-
2150-
return (candidates, fileset_count)
2151-
2152-
21532069
def set_filter_candidate_filesets(
2154-
fileset_id, fileset, fileset_count, transaction_id, conn
2070+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
21552071
):
21562072
"""
21572073
Returns a list of candidate filesets that can be merged.
21582074
Performs early filtering in SQL (by engine, name, size) and then
21592075
applies checksum filtering and max-match filtering in Python.
2076+
In case of glk engines, filtering is not by name, rather gameid is used.
21602077
"""
2078+
is_glk = engine_name == "glk"
21612079
with conn.cursor() as cursor:
21622080
fileset_count += 1
21632081
console_log_candidate_filtering(fileset_count)
@@ -2174,7 +2092,21 @@ def set_filter_candidate_filesets(
21742092
AND f.detection = 1
21752093
AND t.transaction != %s
21762094
"""
2177-
cursor.execute(query, (fileset["sourcefile"], transaction_id))
2095+
if is_glk:
2096+
query += " AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))"
2097+
gameid_pattern = f"%{fileset['name']}%"
2098+
cursor.execute(
2099+
query,
2100+
(
2101+
engine_name,
2102+
transaction_id,
2103+
fileset["name"],
2104+
fileset["name"],
2105+
gameid_pattern,
2106+
),
2107+
)
2108+
else:
2109+
cursor.execute(query, (fileset["sourcefile"], transaction_id))
21782110
raw_candidates = cursor.fetchall()
21792111

21802112
# fileset id to detection files map
@@ -2184,7 +2116,7 @@ def set_filter_candidate_filesets(
21842116
candidate_map[row["fileset_id"]].append(
21852117
{
21862118
"file_id": row["file_id"],
2187-
"name": row["name"],
2119+
"name": os.path.basename(normalised_path(row["name"])).lower(),
21882120
"size": row["size"],
21892121
}
21902122
)
@@ -2193,14 +2125,17 @@ def set_filter_candidate_filesets(
21932125

21942126
set_checksums = set()
21952127
set_file_name_size = set()
2128+
set_glk_file_size = set()
21962129
for file in fileset["rom"]:
2130+
name = os.path.basename(normalised_path(file["name"]))
21972131
for key in file:
21982132
if key.startswith("md5"):
2199-
name = os.path.basename(normalised_path(file["name"]))
22002133
set_checksums.add((file[key], name.lower(), int(file["size"])))
22012134
set_checksums.add((file[key], name.lower(), -1))
22022135
set_file_name_size.add((name.lower(), -1))
22032136
set_file_name_size.add((name.lower(), int(file["size"])))
2137+
if is_glk:
2138+
set_glk_file_size.add(int(file["size"]))
22042139

22052140
# Filter candidates by detection filename and file size (including -1) and increase matched file count
22062141
# if filesize = -1,
@@ -2213,6 +2148,8 @@ def set_filter_candidate_filesets(
22132148
for f in files:
22142149
filename = os.path.basename(f["name"]).lower()
22152150
filesize = f["size"]
2151+
if is_glk and (filesize in set_glk_file_size or filesize == 0):
2152+
count += 1
22162153
if (filename, filesize) in set_file_name_size:
22172154
if filesize == -1:
22182155
count += 1

0 commit comments

Comments
 (0)