Skip to content

Commit 3ef16e8

Browse files
committed
INTEGRITY: Merge filtering logic for glk with existing set.dat filtering.
1 parent 4dd7e29 commit 3ef16e8

File tree

1 file changed

+25
-89
lines changed

1 file changed

+25
-89
lines changed

db_functions.py

Lines changed: 25 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,14 +1659,9 @@ def set_process(
16591659
# Separating out the matching logic for glk engine
16601660
engine_name = fileset["sourcefile"].split("-")[0]
16611661

1662-
if engine_name == "glk":
1663-
(candidate_filesets, fileset_count) = set_glk_filter_candidate_filesets(
1664-
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
1665-
)
1666-
else:
1667-
(candidate_filesets, fileset_count) = set_filter_candidate_filesets(
1668-
fileset_id, fileset, fileset_count, transaction_id, conn
1669-
)
1662+
(candidate_filesets, fileset_count) = set_filter_candidate_filesets(
1663+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
1664+
)
16701665

16711666
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
16721667
if len(candidate_filesets) == 0:
@@ -2071,93 +2066,15 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
20712066
return (len(unmatched_files) == 0, unmatched_files)
20722067

20732068

2074-
def set_glk_filter_candidate_filesets(
2075-
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
2076-
):
2077-
"""
2078-
Returns a list of candidate filesets for glk engines that can be merged
2079-
"""
2080-
with conn.cursor() as cursor:
2081-
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
2082-
fileset_count += 1
2083-
console_log_candidate_filtering(fileset_count)
2084-
query = """
2085-
WITH candidate_fileset AS (
2086-
SELECT fs.id AS fileset_id, f.size
2087-
FROM file f
2088-
JOIN fileset fs ON f.fileset = fs.id
2089-
JOIN game g ON g.id = fs.game
2090-
JOIN engine e ON e.id = g.engine
2091-
JOIN transactions t ON t.fileset = fs.id
2092-
WHERE fs.id != %s
2093-
AND e.engineid = %s
2094-
AND f.detection = 1
2095-
AND t.transaction != %s
2096-
AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))
2097-
),
2098-
total_detection_files AS (
2099-
SELECT cf.fileset_id, COUNT(*) AS detection_files_found
2100-
FROM candidate_fileset cf
2101-
GROUP BY fileset_id
2102-
),
2103-
set_fileset AS (
2104-
SELECT size FROM file
2105-
WHERE fileset = %s
2106-
),
2107-
matched_detection_files AS (
2108-
SELECT cf.fileset_id, COUNT(*) AS match_files_count
2109-
FROM candidate_fileset cf
2110-
JOIN set_fileset sf ON
2111-
cf.size = sf.size OR cf.size = 0
2112-
GROUP BY cf.fileset_id
2113-
),
2114-
valid_matched_detection_files AS (
2115-
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
2116-
FROM matched_detection_files mdf
2117-
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
2118-
WHERE tdf.detection_files_found <= mdf.match_files_count
2119-
),
2120-
max_match_count AS (
2121-
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
2122-
)
2123-
SELECT vmdf.fileset_id
2124-
FROM valid_matched_detection_files vmdf
2125-
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
2126-
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
2127-
"""
2128-
2129-
gameid_pattern = f"%{fileset['name']}%"
2130-
2131-
cursor.execute(
2132-
query,
2133-
(
2134-
fileset_id,
2135-
engine_name,
2136-
transaction_id,
2137-
fileset["name"],
2138-
fileset["name"],
2139-
gameid_pattern,
2140-
fileset_id,
2141-
),
2142-
)
2143-
rows = cursor.fetchall()
2144-
2145-
candidates = []
2146-
if rows:
2147-
for row in rows:
2148-
candidates.append(row["fileset_id"])
2149-
2150-
return (candidates, fileset_count)
2151-
2152-
21532069
def set_filter_candidate_filesets(
2154-
fileset_id, fileset, fileset_count, transaction_id, conn
2070+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
21552071
):
21562072
"""
21572073
Returns a list of candidate filesets that can be merged.
21582074
Performs early filtering in SQL (by engine, name, size) and then
21592075
applies checksum filtering and max-match filtering in Python.
21602076
"""
2077+
is_glk = engine_name == "glk"
21612078
with conn.cursor() as cursor:
21622079
fileset_count += 1
21632080
console_log_candidate_filtering(fileset_count)
@@ -2174,7 +2091,21 @@ def set_filter_candidate_filesets(
21742091
AND f.detection = 1
21752092
AND t.transaction != %s
21762093
"""
2177-
cursor.execute(query, (fileset["sourcefile"], transaction_id))
2094+
if is_glk:
2095+
query += " AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))"
2096+
gameid_pattern = f"%{fileset['name']}%"
2097+
cursor.execute(
2098+
query,
2099+
(
2100+
engine_name,
2101+
transaction_id,
2102+
fileset["name"],
2103+
fileset["name"],
2104+
gameid_pattern,
2105+
),
2106+
)
2107+
else:
2108+
cursor.execute(query, (fileset["sourcefile"], transaction_id))
21782109
raw_candidates = cursor.fetchall()
21792110

21802111
# fileset id to detection files map
@@ -2193,6 +2124,7 @@ def set_filter_candidate_filesets(
21932124

21942125
set_checksums = set()
21952126
set_file_name_size = set()
2127+
set_glk_file_size = set()
21962128
for file in fileset["rom"]:
21972129
for key in file:
21982130
if key.startswith("md5"):
@@ -2201,6 +2133,8 @@ def set_filter_candidate_filesets(
22012133
set_checksums.add((file[key], name.lower(), -1))
22022134
set_file_name_size.add((name.lower(), -1))
22032135
set_file_name_size.add((name.lower(), int(file["size"])))
2136+
if is_glk:
2137+
set_glk_file_size.add(int(file["size"]))
22042138

22052139
# Filter candidates by detection filename and file size (including -1) and increase matched file count
22062140
# if filesize = -1,
@@ -2213,6 +2147,8 @@ def set_filter_candidate_filesets(
22132147
for f in files:
22142148
filename = os.path.basename(f["name"]).lower()
22152149
filesize = f["size"]
2150+
if is_glk and (filesize in set_glk_file_size or filesize == 0):
2151+
count += 1
22162152
if (filename, filesize) in set_file_name_size:
22172153
if filesize == -1:
22182154
count += 1

0 commit comments

Comments
 (0)