@@ -977,7 +977,9 @@ def scan_process(
977
977
978
978
id_to_fileset_mapping = defaultdict (dict )
979
979
980
+ fileset_count = 0
980
981
for fileset in game_data :
982
+ console_log_file_update (fileset_count )
981
983
key = calc_key (fileset )
982
984
megakey = ""
983
985
log_text = f"State { source_status } ."
@@ -1003,9 +1005,12 @@ def scan_process(
1003
1005
filesets_check_for_full = set ()
1004
1006
1005
1007
for rom in fileset ["rom" ]:
1006
- scan_update_files (rom , filesets_check_for_full , transaction_id , conn )
1008
+ pre_update_files (rom , filesets_check_for_full , transaction_id , conn )
1009
+ fileset_count += 1
1007
1010
1011
+ fileset_count = 0
1008
1012
for fileset_id , fileset in id_to_fileset_mapping .items ():
1013
+ console_log_matching (fileset_count )
1009
1014
candidate_filesets = scan_filter_candidate_filesets (
1010
1015
fileset_id , fileset , transaction_id , conn
1011
1016
)
@@ -1047,6 +1052,7 @@ def scan_process(
1047
1052
conn ,
1048
1053
skiplog ,
1049
1054
)
1055
+ fileset_count += 1
1050
1056
1051
1057
# Final log
1052
1058
with conn .cursor () as cursor :
@@ -1063,7 +1069,7 @@ def scan_process(
1063
1069
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
1064
1070
1065
1071
1066
- def scan_update_files (rom , filesets_check_for_full , transaction_id , conn ):
1072
+ def pre_update_files (rom , filesets_check_for_full , transaction_id , conn ):
1067
1073
"""
1068
1074
Updates all the checksums for the files matching by a checksum and size.
1069
1075
"""
@@ -1074,6 +1080,9 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
1074
1080
checksums [key ] = rom [key ]
1075
1081
1076
1082
files_to_update = set ()
1083
+ size = rom ["size" ] if "size" in rom else 0
1084
+ size_r = rom ["size-r" ] if "size-r" in rom else 0
1085
+ size_rd = rom ["size-rd" ] if "size-rd" in rom else 0
1077
1086
1078
1087
for _ , checksum in checksums .items ():
1079
1088
query = """
@@ -1088,9 +1097,7 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
1088
1097
AND f.`size-rd` = %s
1089
1098
AND t.transaction != %s
1090
1099
"""
1091
- size = rom ["size" ] if "size" in rom else 0
1092
- size_r = rom ["size-r" ] if "size-r" in rom else 0
1093
- size_rd = rom ["size-rd" ] if "size-rd" in rom else 0
1100
+
1094
1101
cursor .execute (query , (checksum , size , size_r , size_rd , transaction_id ))
1095
1102
result = cursor .fetchall ()
1096
1103
if result :
@@ -1104,12 +1111,20 @@ def scan_update_files(rom, filesets_check_for_full, transaction_id, conn):
1104
1111
WHERE file = %s
1105
1112
"""
1106
1113
cursor .execute (query , (file_id ,))
1114
+ # Update checksums
1107
1115
for check , checksum in checksums .items ():
1108
1116
checksize , checktype , checksum = get_checksum_props (check , checksum )
1109
1117
query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
1110
1118
cursor .execute (query , (file_id , checksize , checktype , checksum ))
1111
-
1112
- conn .commit ()
1119
+ # Update sizes
1120
+ query = """
1121
+ UPDATE file
1122
+ SET size = %s,
1123
+ `size-r` = %s,
1124
+ `size-rd` = %s,
1125
+ WHERE id = %s
1126
+ """
1127
+ cursor .execute (query , size , size_r , size_rd , file_id )
1113
1128
1114
1129
1115
1130
def scan_perform_match (
@@ -1907,31 +1922,7 @@ def set_perform_match(
1907
1922
)
1908
1923
1909
1924
elif len (candidate_filesets ) > 1 :
1910
- found_match = False
1911
- for candidate_fileset in candidate_filesets :
1912
- (is_match , _ ) = is_full_checksum_match (candidate_fileset , fileset , conn )
1913
- if is_match :
1914
- update_fileset_status (cursor , candidate_fileset , "partial" )
1915
- set_populate_file (fileset , candidate_fileset , conn , detection )
1916
- auto_merged_filesets += 1
1917
- if not skiplog :
1918
- log_matched_fileset (
1919
- src ,
1920
- fileset_id ,
1921
- candidate_fileset ,
1922
- "partial" ,
1923
- user ,
1924
- conn ,
1925
- )
1926
- delete_original_fileset (fileset_id , conn )
1927
- remove_manual_merge_if_size_mismatch (
1928
- candidate_fileset , manual_merge_map , set_to_candidate_dict , conn
1929
- )
1930
- found_match = True
1931
- break
1932
-
1933
- if not found_match :
1934
- manual_merge_map [fileset_id ] = candidate_filesets
1925
+ manual_merge_map [fileset_id ] = candidate_filesets
1935
1926
1936
1927
return (
1937
1928
fully_matched_filesets ,
@@ -2160,8 +2151,7 @@ def set_filter_candidate_filesets(
2160
2151
JOIN game g ON g.id = fs.game
2161
2152
JOIN engine e ON e.id = g.engine
2162
2153
JOIN transactions t ON t.fileset = fs.id
2163
- WHERE fs.id != %s
2164
- AND e.engineid = %s
2154
+ WHERE e.engineid = %s
2165
2155
AND f.detection = 1
2166
2156
AND t.transaction != %s
2167
2157
),
@@ -2199,19 +2189,84 @@ def set_filter_candidate_filesets(
2199
2189
JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count
2200
2190
"""
2201
2191
2202
- cursor .execute (
2203
- query , (fileset_id , fileset ["sourcefile" ], transaction_id , fileset_id )
2204
- )
2192
+ cursor .execute (query , (fileset ["sourcefile" ], transaction_id , fileset_id ))
2205
2193
rows = cursor .fetchall ()
2206
2194
2207
2195
candidates = []
2208
2196
if rows :
2209
2197
for row in rows :
2210
2198
candidates .append (row ["fileset_id" ])
2211
2199
2200
+ matched_candidates = []
2201
+
2202
+ candidates = [
2203
+ candidate
2204
+ for candidate in candidates
2205
+ if is_candidate_by_checksize (candidate , fileset , conn )
2206
+ ]
2207
+
2208
+ for candidate in candidates :
2209
+ if is_full_detection_checksum_match (candidate , fileset , conn ):
2210
+ matched_candidates .append (candidate )
2211
+
2212
+ if len (matched_candidates ) != 0 :
2213
+ candidates = matched_candidates
2214
+
2212
2215
return (candidates , fileset_count )
2213
2216
2214
2217
2218
+ def is_candidate_by_checksize (candidate , fileset , conn ):
2219
+ with conn .cursor () as cursor :
2220
+ cursor .execute (
2221
+ "SELECT id, REGEXP_REPLACE(name, '^.*[\\ \\ /]', '') AS name, size FROM file WHERE detection=1 AND fileset = %s" ,
2222
+ (candidate ,),
2223
+ )
2224
+ target_files = cursor .fetchall ()
2225
+ candidate_files = {
2226
+ target_file ["id" ]: [target_file ["name" ], target_file ["size" ]]
2227
+ for target_file in target_files
2228
+ }
2229
+
2230
+ # set of (checksum, filename)
2231
+ scan_checksums = set ()
2232
+ for file in fileset ["rom" ]:
2233
+ for key in file :
2234
+ if key .startswith ("md5" ):
2235
+ name = os .path .basename (normalised_path (file ["name" ]))
2236
+ scan_checksums .add ((file [key ], name .lower ()))
2237
+
2238
+ for detection_file_id , [
2239
+ detection_file_name ,
2240
+ detection_file_size ,
2241
+ ] in candidate_files .items ():
2242
+ query = """
2243
+ SELECT fc.checksum, fc.checksize, fc.checktype
2244
+ FROM filechecksum fc
2245
+ WHERE fc.file = %s
2246
+ """
2247
+ cursor .execute (query , (detection_file_id ,))
2248
+ checksums_info = cursor .fetchall ()
2249
+ if checksums_info :
2250
+ for checksum_info in checksums_info :
2251
+ checksum = checksum_info ["checksum" ]
2252
+ checksize = checksum_info ["checksize" ]
2253
+ if checksize == "1M" :
2254
+ checksize = 1048576
2255
+ if (
2256
+ (
2257
+ checksum ,
2258
+ os .path .basename (detection_file_name .lower ()),
2259
+ )
2260
+ not in scan_checksums
2261
+ and detection_file_size <= int (checksize )
2262
+ and detection_file_size != - 1
2263
+ ):
2264
+ continue
2265
+ else :
2266
+ return True
2267
+ return False
2268
+
2269
+
2215
2270
def process_fileset (
2216
2271
fileset ,
2217
2272
resources ,
@@ -2972,6 +3027,11 @@ def console_log_candidate_filtering(fileset_count):
2972
3027
sys .stdout .flush ()
2973
3028
2974
3029
3030
+ def console_log_file_update (fileset_count ):
3031
+ sys .stdout .write (f"Updating files - Fileset { fileset_count } \r " )
3032
+ sys .stdout .flush ()
3033
+
3034
+
2975
3035
def console_log_matching (fileset_count ):
2976
3036
sys .stdout .write (f"Performing Match - Fileset { fileset_count } \r " )
2977
3037
sys .stdout .flush ()
0 commit comments