Skip to content

Commit d335d91

Browse files
ShivangNagtasev-
authored andcommitted
INTEGIRTY: Iteratively look for extra files if romof or cloneof field is present in the set.dat metadata. Filtering update.
1 parent 394c098 commit d335d91

File tree

1 file changed

+87
-18
lines changed

1 file changed

+87
-18
lines changed

db_functions.py

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,6 @@ def calc_megakey(fileset):
467467
def db_insert(data_arr, username=None, skiplog=False):
468468
header = data_arr[0]
469469
game_data = data_arr[1]
470-
resources = data_arr[2]
471470
filepath = data_arr[3]
472471

473472
try:
@@ -533,9 +532,6 @@ def db_insert(data_arr, username=None, skiplog=False):
533532
insert_game(
534533
engine_name, engineid, title, gameid, extra, platform, lang, conn
535534
)
536-
elif src == "dat":
537-
if "romof" in fileset and fileset["romof"] in resources:
538-
fileset["rom"] = fileset["rom"] + resources[fileset["romof"]]["rom"]
539535

540536
log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}."
541537

@@ -854,6 +850,7 @@ def match_fileset(data_arr, username=None, skiplog=False):
854850
skiplog,
855851
)
856852
else:
853+
game_data_lookup = {fs["name"]: fs for fs in game_data}
857854
for fileset in game_data:
858855
process_fileset(
859856
fileset,
@@ -867,6 +864,7 @@ def match_fileset(data_arr, username=None, skiplog=False):
867864
version,
868865
source_status,
869866
user,
867+
game_data_lookup,
870868
)
871869
finalize_fileset_insertion(
872870
conn, transaction_id, src, filepath, author, version, source_status, user
@@ -905,9 +903,25 @@ def set_process(
905903
set_to_candidate_dict = defaultdict(list)
906904
id_to_fileset_dict = defaultdict(dict)
907905

906+
game_data_lookup = {fs["name"]: fs for fs in game_data}
907+
908908
for fileset in game_data:
909-
if "romof" in fileset and fileset["romof"] in resources:
910-
fileset["rom"] += resources[fileset["romof"]]["rom"]
909+
# Ideally romof should be enough, but adding in case of an edge case
910+
current_name = fileset.get("romof") or fileset.get("cloneof")
911+
912+
# Iteratively check for extra files if linked to multiple filesets
913+
while current_name:
914+
if current_name in resources:
915+
fileset["rom"] += resources[current_name]["rom"]
916+
break
917+
918+
elif current_name in game_data_lookup:
919+
linked = game_data_lookup[current_name]
920+
fileset["rom"] += linked.get("rom", [])
921+
current_name = linked.get("romof") or linked.get("cloneof")
922+
else:
923+
break
924+
911925
key = calc_key(fileset)
912926
megakey = ""
913927
log_text = f"State {source_status}."
@@ -938,7 +952,7 @@ def set_process(
938952
fileset_description = (
939953
fileset["description"] if "description" in fileset else ""
940954
)
941-
log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}"
955+
log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
942956
create_log(
943957
escape_string(category_text), user, escape_string(log_text), conn
944958
)
@@ -955,14 +969,31 @@ def set_process(
955969
value_to_keys[candidates[0]].append(set_fileset)
956970
for candidate, set_filesets in value_to_keys.items():
957971
if len(set_filesets) > 1:
972+
query = """
973+
SELECT e.engineid, g.gameid, g.platform, g.language
974+
FROM fileset fs
975+
JOIN game g ON fs.game = g.id
976+
JOIN engine e ON e.id = g.engine
977+
WHERE fs.id = %s
978+
"""
979+
result = None
980+
with conn.cursor() as cursor:
981+
cursor.execute(query, (candidate,))
982+
result = cursor.fetchone()
983+
984+
engine = result["engineid"]
985+
gameid = result["gameid"]
986+
platform = result["platform"]
987+
language = result["language"]
988+
958989
for set_fileset in set_filesets:
959990
fileset = id_to_fileset_dict[set_fileset]
960991
category_text = "Drop set fileset - B"
961992
fileset_name = fileset["name"] if "name" in fileset else ""
962993
fileset_description = (
963994
fileset["description"] if "description" in fileset else ""
964995
)
965-
log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}"
996+
log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
966997
create_log(
967998
escape_string(category_text), user, escape_string(log_text), conn
968999
)
@@ -996,7 +1027,8 @@ def set_process(
9961027
# Final log
9971028
with conn.cursor() as cursor:
9981029
cursor.execute(
999-
f"SELECT COUNT(fileset) from transactions WHERE `transaction` = {transaction_id}"
1030+
"SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
1031+
(transaction_id,),
10001032
)
10011033
fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
10021034
category_text = f"Uploaded from {src}"
@@ -1037,7 +1069,7 @@ def set_perform_match(
10371069
set_populate_file(fileset, matched_fileset_id, conn, detection)
10381070
auto_merged_filesets += 1
10391071
if not skiplog:
1040-
log_matched_fileset(
1072+
set_log_matched_fileset(
10411073
src,
10421074
fileset_id,
10431075
matched_fileset_id,
@@ -1087,7 +1119,7 @@ def set_perform_match(
10871119
set_populate_file(fileset, candidate_fileset, conn, detection)
10881120
auto_merged_filesets += 1
10891121
if not skiplog:
1090-
log_matched_fileset(
1122+
set_log_matched_fileset(
10911123
src,
10921124
fileset_id,
10931125
candidate_fileset,
@@ -1185,17 +1217,28 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
11851217
FROM candidate_fileset cf
11861218
JOIN set_fileset sf ON cf.name = sf.name AND (cf.size = sf.size OR cf.size = -1)
11871219
GROUP BY cf.fileset_id
1188-
)
1189-
SELECT mdf.fileset_id
1220+
),
1221+
valid_matched_detection_files AS (
1222+
SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count
11901223
FROM matched_detection_files mdf
1191-
JOIN total_detection_files tdf ON mdf.fileset_id = tdf.fileset_id
1192-
WHERE mdf.match_files_count = tdf.detection_files_found
1193-
ORDER BY mdf.match_files_count DESC;
1224+
JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id
1225+
WHERE tdf.detection_files_found = mdf.match_files_count
1226+
),
1227+
max_match_count AS (
1228+
SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files
1229+
)
1230+
SELECT vmdf.fileset_id
1231+
FROM valid_matched_detection_files vmdf
1232+
JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id
1233+
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
1234+
WHERE vmdf.valid_match_files_count = tdf.detection_files_found;
11941235
"""
1236+
11951237
cursor.execute(
11961238
query, (fileset_id, fileset["sourcefile"], transaction_id, fileset_id)
11971239
)
11981240
rows = cursor.fetchall()
1241+
11991242
candidates = []
12001243
if rows:
12011244
for row in rows:
@@ -1216,11 +1259,26 @@ def process_fileset(
12161259
version,
12171260
source_status,
12181261
user,
1262+
game_data_lookup,
12191263
):
12201264
if detection:
12211265
insert_game_data(fileset, conn)
1222-
elif src == "dat" and "romof" in fileset and fileset["romof"] in resources:
1223-
fileset["rom"] += resources[fileset["romof"]]["rom"]
1266+
1267+
# Ideally romof should be enough, but adding in case of an edge case
1268+
current_name = fileset.get("romof") or fileset.get("cloneof")
1269+
1270+
# Iteratively check for extra files if linked to multiple filesets
1271+
while current_name:
1272+
if current_name in resources:
1273+
fileset["rom"] += resources[current_name]["rom"]
1274+
break
1275+
1276+
elif current_name in game_data_lookup:
1277+
linked = game_data_lookup[current_name]
1278+
fileset["rom"] += linked.get("rom", [])
1279+
current_name = linked.get("romof") or linked.get("cloneof")
1280+
else:
1281+
break
12241282

12251283
key = calc_key(fileset) if not detection else ""
12261284
megakey = calc_megakey(fileset) if detection else ""
@@ -1639,6 +1697,17 @@ def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
16391697
update_history(fileset_last, fileset_id, conn, log_last)
16401698

16411699

1700+
def set_log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
1701+
category_text = f"Matched from {src}"
1702+
log_text = (
1703+
f"Matched Fileset:{fileset_last} with Fileset:{fileset_id}. State {state}."
1704+
)
1705+
log_last = create_log(
1706+
escape_string(category_text), user, escape_string(log_text), conn
1707+
)
1708+
update_history(fileset_last, fileset_id, conn, log_last)
1709+
1710+
16421711
def finalize_fileset_insertion(
16431712
conn, transaction_id, src, filepath, author, version, source_status, user
16441713
):

0 commit comments

Comments
 (0)