diff --git a/clear.py b/clear.py index 707914c8..acdae141 100644 --- a/clear.py +++ b/clear.py @@ -7,26 +7,30 @@ import json import os + def truncate_all_tables(conn): + # fmt: off tables = ["filechecksum", "queue", "history", "transactions", "file", "fileset", "game", "engine", "log"] cursor = conn.cursor() - + # fmt: on + # Disable foreign key checks cursor.execute("SET FOREIGN_KEY_CHECKS = 0") - + for table in tables: try: - cursor.execute(f"TRUNCATE TABLE `{table}`") + cursor.execute("TRUNCATE TABLE %s", (table,)) print(f"Table '{table}' truncated successfully") except pymysql.Error as err: print(f"Error truncating table '{table}': {err}") - + # Enable foreign key checks cursor.execute("SET FOREIGN_KEY_CHECKS = 1") + if __name__ == "__main__": base_dir = os.path.dirname(os.path.abspath(__file__)) - config_path = os.path.join(base_dir, 'mysql_config.json') + config_path = os.path.join(base_dir, "mysql_config.json") with open(config_path) as f: mysql_cred = json.load(f) @@ -41,9 +45,9 @@ def truncate_all_tables(conn): user=username, password=password, db=dbname, # Specify the database to use - charset='utf8mb4', + charset="utf8mb4", cursorclass=pymysql.cursors.DictCursor, - autocommit=True + autocommit=True, ) # Check connection @@ -55,4 +59,4 @@ def truncate_all_tables(conn): truncate_all_tables(conn) # Close connection - conn.close() \ No newline at end of file + conn.close() diff --git a/compute_hash.py b/compute_hash.py index 0067cfdc..d63b22fb 100644 --- a/compute_hash.py +++ b/compute_hash.py @@ -4,6 +4,8 @@ import struct import sys from enum import Enum +from datetime import datetime, date, timedelta +from collections import defaultdict class FileType(Enum): NON_MAC = "non_mac" @@ -16,6 +18,8 @@ class FileType(Enum): script_version = "0.1" +SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f' + # CRC table CRC16_XMODEM_TABLE = [ 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, @@ -73,9 +77,73 @@ def get_dirs_at_depth(directory, depth): if depth == num_sep_this - num_sep: yield root -def read_be_32(byte_stream): + +def my_escape_string(s: str) -> str: + """ + Escape strings + + Escape the following: + - escape char: \x81 + - unallowed filename chars: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words + - control chars < 0x20 + """ + new_name = "" + for char in s: + if char == "\x81": + new_name += "\x81\x79" + elif char in SPECIAL_SYMBOLS or ord(char) < 0x20: + new_name += "\x81" + chr(0x80 + ord(char)) + else: + new_name += char + return new_name + + +def encode_punycode(orig): + """ + Punyencode strings + + - escape special characters and + - ensure filenames can't end in a space or dotif temp == None: + """ + s = my_escape_string(orig) + encoded = s.encode("punycode").decode("ascii") + # punyencoding adds an '-' at the end when there are no special chars + # don't use it for comparing + compare = encoded + if encoded.endswith("-"): + compare = encoded[:-1] + if orig != compare or compare[-1] in " .": + return "xn--" + encoded + return orig + + +def punycode_need_encode(orig): + """ + A filename needs to be punyencoded when it: + + - contains a char that should be escaped or + - ends with a dot or a space. + """ + if len(orig) > 4 and orig[:4] == "xn--": + return False + if not all((0x20 <= ord(c) < 0x80) and c not in SPECIAL_SYMBOLS for c in orig): + return True + if orig[-1] in " .": + return True + return False + +def encode_path_components(filepath): + """ + Puny encodes all separate components of filepath + """ + parts = [i for i in filepath.split(os.sep) if i ] + encoded_parts = [encode_punycode(p) if punycode_need_encode(p) else p for p in parts] + return os.path.join(*encoded_parts) + +def read_be_32(byte_stream, signed=False): """ Return unsigned integer of size_in_bits, assuming the data is big-endian """ - (uint,) = struct.unpack(">I", byte_stream[:32//8]) + format = ">i" if signed else ">I" + (uint,) = struct.unpack(format, byte_stream[:32//8]) return uint def read_be_16(byte_stream): @@ -154,7 +222,6 @@ def is_actual_resource_fork_mac(filepath): """ Returns boolean, checking the actual mac fork if it exists. """ resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc") - print(resource_fork_path) return os.path.exists(resource_fork_path) def is_appledouble(file_byte_stream): @@ -178,7 +245,7 @@ def is_appledouble(file_byte_stream): return True def macbin_get_resfork_data(file_byte_stream): - """ Returns the resource fork's data section as bytes of a macbinary file as well as its size """ + """ Returns the resource fork's data section as bytes, data fork size (size), resource fork size (size-r) and data section of resource fork size (size-rd) of a macbinary file """ if not file_byte_stream: return file_byte_stream @@ -188,10 +255,10 @@ def macbin_get_resfork_data(file_byte_stream): (rsrclen,) = struct.unpack(">I", file_byte_stream[0x57:0x5B]) resoure_fork_offset = 128 + datalen_padded - data_offset = int.from_bytes(file_byte_stream[resoure_fork_offset+0 : resoure_fork_offset+4]) - data_length = int.from_bytes(file_byte_stream[resoure_fork_offset+8 : resoure_fork_offset+12]) + rd_offset = int.from_bytes(file_byte_stream[resoure_fork_offset+0 : resoure_fork_offset+4]) + rd_length = int.from_bytes(file_byte_stream[resoure_fork_offset+8 : resoure_fork_offset+12]) - return (file_byte_stream[resoure_fork_offset + data_offset: resoure_fork_offset + data_offset + data_length], data_length) + return (file_byte_stream[resoure_fork_offset + rd_offset: resoure_fork_offset + rd_offset + rd_length], datalen, rsrclen, rd_length) def macbin_get_datafork(file_byte_stream): if not file_byte_stream: @@ -200,28 +267,9 @@ def macbin_get_datafork(file_byte_stream): (datalen,) = struct.unpack(">I", file_byte_stream[0x53:0x57]) return file_byte_stream[0x80: 0x80 + datalen] -def is_appledouble(file_byte_stream): - """ - Appledouble Structure - - - Header: - +$00 / 4: signature (0x00 0x05 0x16 0x00) - +$04 / 4: version (0x00 0x01 0x00 0x00 (v1) -or- 0x00 0x02 0x00 0x00 (v2)) - +$08 /16: home file system string (v1) -or- zeroes (v2) - +$18 / 2: number of entries - - Entries: - +$00 / 4: entry ID (1-15) - +$04 / 4: offset to data from start of file - +$08 / 4: length of entry in bytes; may be zero - """ - if (not file_byte_stream or read_be_32(file_byte_stream) != 0x00051607): - return False - - return True def appledouble_get_resfork_data(file_byte_stream): - """ Returns the resource fork's data section as bytes of an appledouble file as well as its size """ + """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of an appledouble file""" entry_count = read_be_16(file_byte_stream[24:]) for entry in range(entry_count): @@ -232,13 +280,13 @@ def appledouble_get_resfork_data(file_byte_stream): if id == 2: resource_fork_stream = file_byte_stream[offset:offset+length] - data_offset = int.from_bytes(resource_fork_stream[0:4]) - data_length = int.from_bytes(resource_fork_stream[8:12]) + rd_offset = int.from_bytes(resource_fork_stream[0:4]) + rd_length = int.from_bytes(resource_fork_stream[8:12]) - return (resource_fork_stream[data_offset: data_offset+data_length], data_length) + return (resource_fork_stream[rd_offset: rd_offset+rd_length], length, rd_length) def appledouble_get_datafork(filepath, fileinfo): - """ Returns data fork's content as bytes of appledouble file if found, otherwise empty byte string """ + """ Returns data fork's content as bytes and size of data fork of an appledouble file.""" try: index = filepath.index("__MACOSX") except ValueError: @@ -252,50 +300,54 @@ def appledouble_get_datafork(filepath, fileinfo): try: with open(data_fork_path, "rb") as f: - return f.read() + data = f.read() + return (data, len(data)) except (FileNotFoundError, IsADirectoryError): return b'' def raw_rsrc_get_datafork(filepath): - """ Returns the data fork's content as bytes corresponding to raw rsrc file. """ + """ Returns the data fork's content as bytes and size of the data fork corresponding to raw rsrc file. """ try: with open(filepath[:-5]+".data", "rb") as f: - return f.read() + data = f.read() + return (data, len(data)) except (FileNotFoundError, IsADirectoryError): return b'' def raw_rsrc_get_resource_fork_data(filepath): - """ Returns the resource fork's data section as bytes of a raw rsrc file as well as its size """ + """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of a raw rsrc file.""" with open(filepath, "rb") as f: resource_fork_stream = f.read() - data_offset = int.from_bytes(resource_fork_stream[0:4]) - data_length = int.from_bytes(resource_fork_stream[8:12]) + resource_fork_len = len(resource_fork_stream) + rd_offset = int.from_bytes(resource_fork_stream[0:4]) + rd_length = int.from_bytes(resource_fork_stream[8:12]) - return (resource_fork_stream[data_offset: data_offset+data_length], data_length) + return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length) def actual_mac_fork_get_data_fork(filepath): - """ Returns the data fork's content as bytes if the actual mac fork exists """ + """ Returns the data fork's content as bytes and its size if the actual mac fork exists """ try: with open(filepath, "rb") as f: - return f.read() + data = f.read() + return (data, len(data)) except (FileNotFoundError, IsADirectoryError): return b'' def actual_mac_fork_get_resource_fork_data(filepath): - """ Returns the resource fork's data section as bytes of the actual mac fork as well as its size """ + """ Returns the resource fork's data section as bytes, size of resource fork (size-r) and size of data section of resource fork (size-rd) of the actual mac fork.""" resource_fork_path = os.path.join(filepath, "..namedfork", "rsrc") with open(resource_fork_path, "rb") as f: resource_fork_stream = f.read() - data_offset = int.from_bytes(resource_fork_stream[0:4]) - data_length = int.from_bytes(resource_fork_stream[8:12]) + resource_fork_len = len(resource_fork_stream) + rd_offset = int.from_bytes(resource_fork_stream[0:4]) + rd_length = int.from_bytes(resource_fork_stream[8:12]) - return (resource_fork_stream[data_offset: data_offset+data_length], data_length) + return (resource_fork_stream[rd_offset: rd_offset+rd_length], resource_fork_len, rd_length) -def file_checksum(filepath, alg, size, file_info): - cur_file_size = 0 +def file_checksum(filepath, alg, custom_checksum_size, file_info): with open(filepath, "rb") as f: if file_info[0] == FileType.NON_MAC: - return (create_checksum_pairs(checksum(f, alg, size, filepath), alg, size), filesize(filepath)) + return (create_checksum_pairs(checksum(f, alg, custom_checksum_size, filepath), alg, custom_checksum_size), filesize(filepath), 0, 0) # Processing mac files res = [] @@ -303,29 +355,33 @@ def file_checksum(filepath, alg, size, file_info): datafork = b'' file_data = f.read() + size = 0 + size_r = 0 + size_rd = 0 + if file_info[0] == FileType.MAC_BINARY: - (resfork, cur_file_size) = macbin_get_resfork_data(file_data) + (resfork, size, size_r, size_rd) = macbin_get_resfork_data(file_data) datafork = macbin_get_datafork(file_data) elif file_info[0] in {FileType.APPLE_DOUBLE_DOT_, FileType.APPLE_DOUBLE_RSRC, FileType.APPLE_DOUBLE_MACOSX}: - (resfork, cur_file_size) = appledouble_get_resfork_data(file_data) - datafork = appledouble_get_datafork(filepath, file_info) + (resfork, size_r, size_rd) = appledouble_get_resfork_data(file_data) + (datafork, size) = appledouble_get_datafork(filepath, file_info) elif file_info[0] == FileType.RAW_RSRC: - (resfork, cur_file_size) = raw_rsrc_get_resource_fork_data(filepath) - datafork = raw_rsrc_get_datafork(filepath) + (resfork, size_r, size_rd) = raw_rsrc_get_resource_fork_data(filepath) + datafork, size = raw_rsrc_get_datafork(filepath) elif file_info[0] == FileType.ACTUAL_FORK_MAC: - (resfork, cur_file_size) = actual_mac_fork_get_resource_fork_data(filepath) - datafork = actual_mac_fork_get_data_fork(filepath) + (resfork, size_r, size_rd) = actual_mac_fork_get_resource_fork_data(filepath) + (datafork, size) = actual_mac_fork_get_data_fork(filepath) - hashes = checksum(resfork, alg, size, filepath) + hashes = checksum(resfork, alg, custom_checksum_size, filepath) prefix = 'r' if len(resfork): - res.extend(create_checksum_pairs(hashes, alg, size, prefix)) + res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix)) - hashes = checksum(datafork, alg, size, filepath) + hashes = checksum(datafork, alg, custom_checksum_size, filepath) prefix = 'd' - res.extend(create_checksum_pairs(hashes, alg, size, prefix)) + res.extend(create_checksum_pairs(hashes, alg, custom_checksum_size, prefix)) - return (res, cur_file_size) + return (res, size, size_r, size_rd) def create_checksum_pairs(hashes, alg, size, prefix=None): res = [] @@ -505,7 +561,7 @@ def file_filter(files): for file in to_be_deleted: del files[file] -def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"): +def compute_hash_of_dirs(root_directory, depth, size=0, limit_timestamps_date=None, alg="md5"): """ Return dictionary containing checksums of all files in directory """ res = [] @@ -518,8 +574,11 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"): for root, _, contents in os.walk(directory): files.extend([os.path.join(root, f) for f in contents]) + # Filter out the files based on user input date - limit_timestamps_date + filtered_file_map = filter_files_by_timestamp(files, limit_timestamp_date) + # Produce filetype and filename(name to be used in game entry) for each file - for filepath in files: + for filepath in filtered_file_map: file_collection[filepath] = file_classification(filepath) # Remove extra entries of macfiles to avoid extra checksum calculation in form of non mac files @@ -538,11 +597,113 @@ def compute_hash_of_dirs(root_directory, depth, size=0, alg="md5"): relative_dir = os.path.dirname(os.path.dirname(relative_path)) relative_path = os.path.join(relative_dir, base_name) - hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info) + hash_of_dir[relative_path] = file_checksum(file_path, alg, size, file_info) + (filtered_file_map[file_path],) res.append(hash_of_dir) return res + +def extract_macbin_mtime(file_byte_stream): + """ + Returns modification time of macbinary file from the header. + Doc - +$5f / 4: modification date/time. + Doc - Timestamps are unsigned 32-bit values indicating the time in seconds since midnight on Jan 1, 1904, in local time. + """ + macbin_epoch = datetime(1904, 1, 1) + header = file_byte_stream[:128] + macbin_seconds = read_be_32(header[0x5f:]) + return (macbin_epoch + timedelta(seconds=macbin_seconds)).date() + + +def extract_mtime_appledouble(file_byte_stream): + """ + Returns modification time of appledouble file. + Doc 1 - The File Dates Info entry (ID=8) consists of the file creation, modification, backup + and access times (see Figure 2-1), stored as a signed number of seconds before + or after 12:00 a.m. (midnight), January 1, 2000 Greenwich Mean Time (GMT) + + Doc 2 - + struct ASFileDates /* entry ID 8, file dates info */ + { + sint32 create; /* file creation date/time */ + sint32 modify; /* last modification date/time */ + sint32 backup; /* last backup date/time */ + sint32 access; /* last access date/time */ + }; /* ASFileDates */ + """ + entry_count = read_be_16(file_byte_stream[24:]) + for entry in range(entry_count): + start_index = 26 + entry*12 + id = read_be_32(file_byte_stream[start_index:]) + offset = read_be_32(file_byte_stream[start_index+4:]) + length = read_be_32(file_byte_stream[start_index+8:]) + + if id == 8: + date_info_data = file_byte_stream[offset:offset + length] + if len(date_info_data) < 16: + raise ValueError("FileDatesInfo block is too short.") + appledouble_epoch = datetime(2000, 1, 1) + modify_seconds = read_be_32(date_info_data[4:8], signed=True) + return (appledouble_epoch + timedelta(seconds=modify_seconds)).date() + + return None + + +def macfile_timestamp(filepath): + """ + Returns the modification times for the mac file from their finderinfo. + If the file is not a macfile, it returns None + """ + with open(filepath, "rb") as f: + data = f.read() + # Macbinary + if is_macbin(filepath): + return extract_macbin_mtime(data) + + # Appledouble + if is_appledouble_rsrc(filepath) or is_appledouble_in_dot_(filepath) or is_appledouble_in_macosx(filepath): + return extract_mtime_appledouble(data) + + return None + + +def validate_date(date_str): + """ + Confirms if the user provided timestamp is in a valid format. + Returns the date as a datetime object. + """ + formats = ["%Y-%m-%d", "%Y-%m", "%Y"] + for fmt in formats: + try: + return datetime.strptime(date_str, fmt).date() + except ValueError: + continue + raise ValueError("Invalid date format. Use YYYY, YYYY-MM, or YYYY-MM-DD") + + +def filter_files_by_timestamp(files, limit_timestamps_date): + """ + Removes the files those were modified after a certain timestamp provided by the user. + The files those were modified today are kept. + Returns filtered map with filepath and its modification time + """ + + filtered_file_map = defaultdict(str) + + if limit_timestamp_date is not None: + user_date = validate_date(limit_timestamps_date) + today = date.today() + + for filepath in files: + mtime = macfile_timestamp(filepath) + if mtime is None: + mtime = datetime.fromtimestamp(os.path.getmtime(filepath)).date() + if limit_timestamps_date is None or (limit_timestamps_date is not None and (mtime <= user_date or mtime == today)): + filtered_file_map[filepath] = str(mtime) + + return filtered_file_map + + def create_dat_file(hash_of_dirs, path, checksum_size=0): with open(f"{os.path.basename(path)}.dat", "w") as file: # Header @@ -556,8 +717,9 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0): # Game files for hash_of_dir in hash_of_dirs: file.write("game (\n") - for filename, (hashes, filesize) in hash_of_dir.items(): - data = f"name \"{filename}\" size {filesize}" + for filename, (hashes, size, size_r, size_rd, timestamp) in hash_of_dir.items(): + filename = encode_path_components(filename) + data = f"name '{filename}' size {size} size-r {size_r} size-rd {size_rd} modification-time {timestamp}" for key, value in hashes: data += f" {key} {value}" @@ -579,10 +741,13 @@ def error(self, message): help="Depth from root to game directories") parser.add_argument("--size", help="Use first n bytes of file to calculate checksum") +parser.add_argument("--limit-timestamps", + help="Format - YYYY-MM-DD or YYYY-MM or YYYY. Filters out the files those were modified after the given timestamp. Note that if the modification time is today, it would not be filtered out.") args = parser.parse_args() path = os.path.abspath(args.directory) if args.directory else os.getcwd() depth = int(args.depth) if args.depth else 0 checksum_size = int(args.size) if args.size else 0 +limit_timestamp_date = str(args.limit_timestamps) if args.limit_timestamps else None create_dat_file(compute_hash_of_dirs( - path, depth, checksum_size), path, checksum_size) + path, depth, checksum_size, limit_timestamp_date), path, checksum_size) diff --git a/dat_parser.py b/dat_parser.py index b3ce12ef..a76480b2 100644 --- a/dat_parser.py +++ b/dat_parser.py @@ -33,6 +33,9 @@ def map_checksum_data(content_string): elif tokens[i] == "size-rd": current_rom["size-rd"] = int(tokens[i + 1]) i += 2 + elif tokens[i] == "modification-time": + current_rom["modification-time"] = tokens[i + 1] + i += 2 else: checksum_key = tokens[i] checksum_value = tokens[i + 1] if len(tokens) >= 6 else "0" diff --git a/db_functions.py b/db_functions.py index 45adc5de..490bcebb 100644 --- a/db_functions.py +++ b/db_functions.py @@ -9,11 +9,13 @@ from collections import defaultdict import re import copy +import sys SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f' def db_connect(): + console_log("Connecting to the Database.") base_dir = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(base_dir, "mysql_config.json") with open(config_path) as f: @@ -28,10 +30,28 @@ def db_connect(): cursorclass=pymysql.cursors.DictCursor, autocommit=False, ) - + console_log(f"Connected to Database - {mysql_cred['dbname']}") return conn +def db_connect_root(): + base_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(base_dir, "mysql_config.json") + with open(config_path) as f: + mysql_cred = json.load(f) + + conn = pymysql.connect( + host=mysql_cred["servername"], + user=mysql_cred["username"], + password=mysql_cred["password"], + charset="utf8mb4", + cursorclass=pymysql.cursors.DictCursor, + autocommit=True, + ) + + return (conn, mysql_cred["dbname"]) + + def get_checksum_props(checkcode, checksum): checksize = 0 checktype = checkcode @@ -59,24 +79,26 @@ def insert_game(engine_name, engineid, title, gameid, extra, platform, lang, con # Set @engine_last if engine already present in table exists = False with conn.cursor() as cursor: - cursor.execute(f"SELECT id FROM engine WHERE engineid = '{engineid}'") + cursor.execute("SELECT id FROM engine WHERE engineid = %s", (engineid,)) res = cursor.fetchone() if res is not None: exists = True - cursor.execute(f"SET @engine_last = '{res['id']}'") + cursor.execute("SET @engine_last = %s", (res["id"],)) # Insert into table if not present if not exists: with conn.cursor() as cursor: cursor.execute( - f"INSERT INTO engine (name, engineid) VALUES ('{escape_string(engine_name)}', '{engineid}')" + "INSERT INTO engine (name, engineid) VALUES (%s, %s)", + (engine_name, engineid), ) cursor.execute("SET @engine_last = LAST_INSERT_ID()") # Insert into game with conn.cursor() as cursor: cursor.execute( - f"INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES ('{escape_string(title)}', @engine_last, '{gameid}', '{escape_string(extra)}', '{platform}', '{lang}')" + "INSERT INTO game (name, engine, gameid, extra, platform, language) VALUES (%s, @engine_last, %s, %s, %s, %s)", + (title, gameid, extra, platform, lang), ) cursor.execute("SET @game_last = LAST_INSERT_ID()") @@ -89,6 +111,7 @@ def insert_fileset( transaction, log_text, conn, + set_dat_metadata="", ip="", username=None, skiplog=None, @@ -108,27 +131,27 @@ def insert_fileset( # Check if key/megakey already exists, if so, skip insertion (no quotes on purpose) if detection: with conn.cursor() as cursor: - cursor.execute(f"SELECT id FROM fileset WHERE megakey = {megakey}") + cursor.execute("SELECT id FROM fileset WHERE megakey = %s", (megakey,)) existing_entry = cursor.fetchone() else: with conn.cursor() as cursor: - cursor.execute(f"SELECT id FROM fileset WHERE `key` = {key}") + cursor.execute("SELECT id FROM fileset WHERE `key` = %s", (key,)) existing_entry = cursor.fetchone() if existing_entry is not None: existing_entry = existing_entry["id"] with conn.cursor() as cursor: - cursor.execute(f"SET @fileset_last = {existing_entry}") - cursor.execute(f"DELETE FROM file WHERE fileset = {existing_entry}") + cursor.execute("SET @fileset_last = %s", (existing_entry,)) + cursor.execute("DELETE FROM file WHERE fileset = %s", (existing_entry,)) cursor.execute( - f"UPDATE fileset SET `timestamp` = FROM_UNIXTIME(@fileset_time_last) WHERE id = {existing_entry}" + "UPDATE fileset SET `timestamp` = FROM_UNIXTIME(@fileset_time_last) WHERE id = %s", + (existing_entry,), ) cursor.execute( - f"UPDATE fileset SET status = 'detection' WHERE id = {existing_entry} AND status = 'obsolete'" + "SELECT status FROM fileset WHERE id = %s", (existing_entry,) ) - cursor.execute(f"SELECT status FROM fileset WHERE id = {existing_entry}") status = cursor.fetchone()["status"] if status == "user": add_usercount(existing_entry, conn) @@ -144,10 +167,10 @@ def insert_fileset( return (existing_entry, True) # $game and $key should not be parsed as a mysql string, hence no quotes - query = f"INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`) VALUES ({game}, '{status}', '{src}', {key}, {megakey}, FROM_UNIXTIME(@fileset_time_last))" + query = "INSERT INTO fileset (game, status, src, `key`, megakey, `timestamp`, set_dat_metadata) VALUES (%s, %s, %s, %s, %s, FROM_UNIXTIME(@fileset_time_last), %s)" fileset_id = -1 with conn.cursor() as cursor: - cursor.execute(query) + cursor.execute(query, (game, status, src, key, megakey, set_dat_metadata)) fileset_id = cursor.lastrowid cursor.execute("SET @fileset_last = LAST_INSERT_ID()") @@ -170,7 +193,8 @@ def insert_fileset( update_history(0, fileset_last, conn) with conn.cursor() as cursor: cursor.execute( - f"INSERT INTO transactions (`transaction`, fileset) VALUES ({transaction}, {fileset_last})" + "INSERT INTO transactions (`transaction`, fileset) VALUES (%s, %s)", + (transaction, fileset_last), ) return (fileset_id, False) @@ -212,14 +236,11 @@ def insert_file(file, detection, src, conn): values.append(file["size"] if "size" in file else "0") values.append(file["size-r"] if "size-r" in file else "0") values.append(file["size-rd"] if "size-rd" in file else "0") - + values.append(file["modification-time"] if "modification-time" in file else "") values.extend([checksum, detection, detection_type]) # Parameterised Query - placeholders = ( - ["%s"] * (len(values[:5])) + ["@fileset_last"] + ["%s"] * 2 + ["NOW()"] - ) - query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})" + query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES (%s, %s, %s, %s, %s, %s, @fileset_last, %s, %s, NOW())" with conn.cursor() as cursor: cursor.execute(query, values) @@ -227,32 +248,39 @@ def insert_file(file, detection, src, conn): if detection: with conn.cursor() as cursor: cursor.execute( - f"UPDATE fileset SET detection_size = {checksize} WHERE id = @fileset_last AND detection_size IS NULL" + "UPDATE fileset SET detection_size = %s WHERE id = @fileset_last AND detection_size IS NULL", + (checksize,), ) with conn.cursor() as cursor: cursor.execute("SET @file_last = LAST_INSERT_ID()") -def insert_filechecksum(file, checktype, conn): +def insert_filechecksum(file, checktype, file_id, conn): if checktype not in file: return checksum = file[checktype] checksize, checktype, checksum = get_checksum_props(checktype, checksum) - query = f"INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, '{checksize}', '{checktype}', '{checksum}')" + query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)" + with conn.cursor() as cursor: + cursor.execute(query, (file_id, checksize, checktype, checksum)) + + add_all_equal_checksums(checksize, checktype, checksum, file_id, conn) + + +def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn): + """ + We can update all the checksums when file size is less than the checksum size type, as all checksums are equal in that case. + """ with conn.cursor() as cursor: - cursor.execute(query) if "md5" not in checktype: return - size_name = "size" if checktype[-1] == "r": size_name += "-rd" - if checktype[-1] == "s": - size_name += "-d" - cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = @file_last") + cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = %s", (file_id,)) result = cursor.fetchone() if not result: return @@ -281,9 +309,10 @@ def insert_filechecksum(file, checktype, conn): checksum_size = exploded.pop() checksum_type = "-".join(exploded) - query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (@file_last, %s, %s, %s)" - with conn.cursor() as cursor: - cursor.execute(query, (checksum_size, checksum_type, checksum)) + query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)" + cursor.execute( + query, (file_id, checksum_size, checksum_type, checksum) + ) def delete_filesets(conn): @@ -347,9 +376,10 @@ def punycode_need_encode(orig): def create_log(category, user, text, conn): query = f"INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME({int(time.time())}), '{escape_string(category)}', '{escape_string(user)}', '{escape_string(text)}')" + query = "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME(%s), %s, %s, %s)" with conn.cursor() as cursor: try: - cursor.execute(query) + cursor.execute(query, (int(time.time()), category, user, text)) conn.commit() except Exception as e: conn.rollback() @@ -362,10 +392,12 @@ def create_log(category, user, text, conn): def update_history(source_id, target_id, conn, log_last=None): - query = f"INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (NOW(), {target_id}, {source_id}, {log_last if log_last is not None else 0})" + query = "INSERT INTO history (`timestamp`, fileset, oldfileset, log) VALUES (NOW(), %s, %s, %s)" with conn.cursor() as cursor: try: - cursor.execute(query) + cursor.execute( + query, (target_id, source_id, log_last if log_last is not None else 0) + ) conn.commit() except Exception as e: conn.rollback() @@ -390,7 +422,8 @@ def get_all_related_filesets(fileset_id, conn, visited=None): try: with conn.cursor() as cursor: cursor.execute( - f"SELECT fileset, oldfileset FROM history WHERE fileset = {fileset_id} OR oldfileset = {fileset_id}" + "SELECT fileset, oldfileset FROM history WHERE fileset = %s OR oldfileset = %s", + (fileset_id, fileset_id), ) history_records = cursor.fetchall() @@ -488,7 +521,7 @@ def db_insert(data_arr, username=None, skiplog=False): detection = src == "scummvm" status = "detection" if detection else src - conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}") + conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),)) with conn.cursor() as cursor: cursor.execute("SELECT MAX(`transaction`) FROM transactions") @@ -498,23 +531,34 @@ def db_insert(data_arr, username=None, skiplog=False): transaction_id = temp + 1 category_text = f"Uploaded from {src}" - log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}" + log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}" user = f"cli:{getpass.getuser()}" if username is None else username create_log(escape_string(category_text), user, escape_string(log_text), conn) + console_log(log_text) + console_log_total_filesets(filepath) + + fileset_count = 1 for fileset in game_data: + console_log_detection(fileset_count) key = calc_key(fileset) megakey = calc_megakey(fileset) if detection: - engine_name = fileset["engine"] - engineid = fileset["sourcefile"] - gameid = fileset["name"] - title = fileset["title"] - extra = fileset["extra"] - platform = fileset["platform"] - lang = fileset["language"] + try: + engine_name = fileset.get("engine", "") + engineid = fileset["sourcefile"] + gameid = fileset["name"] + title = fileset.get("title", "") + extra = fileset.get("extra", "") + platform = fileset.get("platform", "") + lang = fileset.get("language", "") + except KeyError as e: + print( + f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}" + ) + return with conn.cursor() as cursor: query = """ @@ -527,7 +571,7 @@ def db_insert(data_arr, username=None, skiplog=False): if existing_entry is not None: log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}" create_log("Warning", user, escape_string(log_text), conn) - print(log_text) + console_log(log_text) continue insert_game( @@ -558,23 +602,27 @@ def db_insert(data_arr, username=None, skiplog=False): for file in unique_files: insert_file(file, detection, src, conn) + file_id = None + with conn.cursor() as cursor: + cursor.execute("SELECT @file_last AS file_id") + file_id = cursor.fetchone()["file_id"] for key, value in file.items(): if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]: - insert_filechecksum(file, key, conn) + insert_filechecksum(file, key, file_id, conn) + + fileset_count += 1 - if detection: - conn.cursor().execute( - "UPDATE fileset SET status = 'obsolete' WHERE `timestamp` != FROM_UNIXTIME(@fileset_time_last) AND status = 'detection'" - ) cur = conn.cursor() try: cur.execute( - f"SELECT COUNT(fileset) from transactions WHERE `transaction` = {transaction_id}" + "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s", + (transaction_id,), ) fileset_insertion_count = cur.fetchone()["COUNT(fileset)"] category_text = f"Uploaded from {src}" log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}" + console_log(log_text) except Exception as e: print("Inserting failed:", e) else: @@ -585,11 +633,13 @@ def db_insert(data_arr, username=None, skiplog=False): def compare_filesets(id1, id2, conn): with conn.cursor() as cursor: cursor.execute( - f"SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = '{id1}'" + "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s", + (id1,), ) fileset1 = cursor.fetchall() cursor.execute( - f"SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = '{id2}'" + "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s", + (id2,), ) fileset2 = cursor.fetchall() @@ -623,9 +673,9 @@ def find_matching_game(game_files): for file in game_files: checksum = file[1] - query = f"SELECT file.fileset as file_fileset FROM filechecksum JOIN file ON filechecksum.file = file.id WHERE filechecksum.checksum = '{checksum}' AND file.detection = TRUE" + query = "SELECT file.fileset as file_fileset FROM filechecksum JOIN file ON filechecksum.file = file.id WHERE filechecksum.checksum = %s AND file.detection = TRUE" with conn.cursor() as cursor: - cursor.execute(query) + cursor.execute(query, (checksum,)) records = cursor.fetchall() # If file is not part of detection entries, skip it @@ -640,7 +690,8 @@ def find_matching_game(game_files): for key, value in Counter(matching_filesets).items(): with conn.cursor() as cursor: cursor.execute( - f"SELECT COUNT(file.id) FROM file JOIN fileset ON file.fileset = fileset.id WHERE fileset.id = '{key}'" + "SELECT COUNT(file.id) FROM file JOIN fileset ON file.fileset = fileset.id WHERE fileset.id = %s", + (key,), ) count_files_in_fileset = cursor.fetchone()["COUNT(file.id)"] @@ -651,7 +702,8 @@ def find_matching_game(game_files): with conn.cursor() as cursor: cursor.execute( - f"SELECT engineid, game.id, gameid, platform, language, `key`, src, fileset.id as fileset FROM game JOIN fileset ON fileset.game = game.id JOIN engine ON engine.id = game.engine WHERE fileset.id = '{key}'" + "SELECT engineid, game.id, gameid, platform, language, `key`, src, fileset.id as fileset FROM game JOIN fileset ON fileset.game = game.id JOIN engine ON engine.id = game.engine WHERE fileset.id = %s", + (key,), ) records = cursor.fetchall() @@ -675,7 +727,7 @@ def find_matching_game(game_files): if compare_filesets(matching_games[0]["fileset"], game_files[0][0], conn): with conn.cursor() as cursor: cursor.execute( - f"UPDATE fileset SET `delete` = TRUE WHERE id = {game_files[0][0]}" + "UPDATE fileset SET `delete` = TRUE WHERE id = %s", (game_files[0][0],) ) return [] @@ -688,7 +740,8 @@ def merge_filesets(detection_id, dat_id): try: with conn.cursor() as cursor: cursor.execute( - f"SELECT DISTINCT(filechecksum.checksum), checksize, checktype FROM filechecksum JOIN file on file.id = filechecksum.file WHERE fileset = '{detection_id}'" + "SELECT DISTINCT(filechecksum.checksum), checksize, checktype FROM filechecksum JOIN file on file.id = filechecksum.file WHERE fileset = %s'", + (detection_id,), ) detection_files = cursor.fetchall() @@ -698,22 +751,26 @@ def merge_filesets(detection_id, dat_id): checktype = file[2] cursor.execute( - f"DELETE FROM file WHERE checksum = '{checksum}' AND fileset = {detection_id} LIMIT 1" + "DELETE FROM file WHERE checksum = %s AND fileset = %s LIMIT 1", + (checksum, detection_id), ) cursor.execute( - f"UPDATE file JOIN filechecksum ON filechecksum.file = file.id SET detection = TRUE, checksize = {checksize}, checktype = '{checktype}' WHERE fileset = '{dat_id}' AND filechecksum.checksum = '{checksum}'" + "UPDATE file JOIN filechecksum ON filechecksum.file = file.id SET detection = TRUE, checksize = %s, checktype = %s WHERE fileset = %s AND filechecksum.checksum = %s", + (checksize, checktype, dat_id, checksum), ) cursor.execute( - f"INSERT INTO history (`timestamp`, fileset, oldfileset) VALUES (FROM_UNIXTIME({int(time.time())}), {dat_id}, {detection_id})" + "INSERT INTO history (`timestamp`, fileset, oldfileset) VALUES (FROM_UNIXTIME(%s), %s, %s)", + (int(time.time()), dat_id, detection_id), ) cursor.execute("SELECT LAST_INSERT_ID()") history_last = cursor.fetchone()["LAST_INSERT_ID()"] cursor.execute( - f"UPDATE history SET fileset = {dat_id} WHERE fileset = {detection_id}" + "UPDATE history SET fileset = %s WHERE fileset = %s", + (dat_id, detection_id), ) - cursor.execute(f"DELETE FROM fileset WHERE id = {detection_id}") + cursor.execute("DELETE FROM fileset WHERE id = %s", (detection_id,)) conn.commit() except Exception as e: @@ -770,11 +827,13 @@ def populate_matching_games(): log_text = f"Matched game {matched_game['engineid']}:\n{matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']}\nvariant {matched_game['key']}. State {status}. Fileset:{fileset[0][0]}." # Updating the fileset.game value to be $matched_game["id"] - query = f"UPDATE fileset SET game = {matched_game['id']}, status = '{status}', `key` = '{matched_game['key']}' WHERE id = {fileset[0][0]}" + query = "UPDATE fileset SET game = %s, status = %s, `key` = %s WHERE id = %s" history_last = merge_filesets(matched_game["fileset"], fileset[0][0]) - if cursor.execute(query): + if cursor.execute( + query, (matched_game["id"], status, matched_game["key"], fileset[0][0]) + ): user = f"cli:{getpass.getuser()}" create_log( @@ -793,7 +852,7 @@ def populate_matching_games(): # Add log id to the history table cursor.execute( - f"UPDATE history SET log = {log_last} WHERE id = {history_last}" + "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last) ) try: @@ -831,7 +890,7 @@ def match_fileset(data_arr, username=None, skiplog=False): detection = src == "scummvm" source_status = "detection" if detection else src - conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}") + conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),)) with conn.cursor() as cursor: cursor.execute("SELECT MAX(`transaction`) FROM transactions") @@ -839,8 +898,9 @@ def match_fileset(data_arr, username=None, skiplog=False): transaction_id = transaction_id + 1 if transaction_id else 1 category_text = f"Uploaded from {src}" - log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}" - + log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}" + console_log(log_text) + console_log_total_filesets(filepath) user = f"cli:{getpass.getuser()}" if username is None else username create_log(escape_string(category_text), user, escape_string(log_text), conn) @@ -859,6 +919,21 @@ def match_fileset(data_arr, username=None, skiplog=False): user, skiplog, ) + elif src == "scan": + scan_process( + game_data, + resources, + detection, + src, + conn, + transaction_id, + filepath, + author, + version, + source_status, + user, + skiplog, + ) else: game_data_lookup = {fs["name"]: fs for fs in game_data} for fileset in game_data: @@ -881,6 +956,720 @@ def match_fileset(data_arr, username=None, skiplog=False): ) +def scan_process( + game_data, + resources, + detection, + src, + conn, + transaction_id, + filepath, + author, + version, + source_status, + user, + skiplog, +): + """ + Entry point for processing logic for scan.dat. + First Pass - Update all files with matching checksum and file size. + Second Pass - Filter candidate with matching with filename, filesize and filechecksum + - Perform matching. + """ + + manual_merged_filesets = 0 + automatic_merged_filesets = 0 + match_with_full_fileset = 0 + mismatch_with_full_fileset = 0 + dropped_early_no_candidate = 0 + manual_merged_with_detection = 0 + filesets_with_missing_files = 0 + + id_to_fileset_mapping = defaultdict(dict) + + fileset_count = 0 + for fileset in game_data: + console_log_file_update(fileset_count) + key = calc_key(fileset) + megakey = "" + log_text = f"State {source_status}." + + (fileset_id, existing) = insert_new_fileset( + fileset, + conn, + detection, + src, + key, + megakey, + transaction_id, + log_text, + user, + skiplog=skiplog, + ) + if existing: + continue + + id_to_fileset_mapping[fileset_id] = fileset + + # set of filesets whose files got updated + filesets_check_for_full = set() + + for rom in fileset["rom"]: + pre_update_files(rom, filesets_check_for_full, transaction_id, conn) + fileset_count += 1 + + fileset_count = 0 + for fileset_id, fileset in id_to_fileset_mapping.items(): + console_log_matching(fileset_count) + candidate_filesets = scan_filter_candidate_filesets( + fileset_id, fileset, transaction_id, conn + ) + + if len(candidate_filesets) == 0: + category_text = "Drop fileset - No Candidates" + fileset_name = fileset["name"] if "name" in fileset else "" + fileset_description = ( + fileset["description"] if "description" in fileset else "" + ) + log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}." + create_log( + escape_string(category_text), user, escape_string(log_text), conn + ) + dropped_early_no_candidate += 1 + delete_original_fileset(fileset_id, conn) + continue + + ( + automatic_merged_filesets, + manual_merged_filesets, + match_with_full_fileset, + mismatch_with_full_fileset, + manual_merged_with_detection, + filesets_with_missing_files, + ) = scan_perform_match( + fileset, + src, + user, + fileset_id, + detection, + candidate_filesets, + automatic_merged_filesets, + manual_merged_filesets, + match_with_full_fileset, + mismatch_with_full_fileset, + manual_merged_with_detection, + filesets_with_missing_files, + conn, + skiplog, + ) + fileset_count += 1 + + # Final log + with conn.cursor() as cursor: + cursor.execute( + "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s", + (transaction_id,), + ) + fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"] + category_text = f"Uploaded from {src}" + log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}" + create_log(escape_string(category_text), user, escape_string(log_text), conn) + category_text = "Upload information" + log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}." + console_log(log_text) + create_log(escape_string(category_text), user, escape_string(log_text), conn) + + +def pre_update_files(rom, filesets_check_for_full, transaction_id, conn): + """ + Updates all the checksums for the files matching by a checksum and size. + """ + with conn.cursor() as cursor: + checksums = defaultdict(str) + for key in rom: + if key not in ["name", "size", "size-r", "size-rd", "modification-time"]: + checksums[key] = rom[key] + + files_to_update = set() + size = rom["size"] if "size" in rom else 0 + size_r = rom["size-r"] if "size-r" in rom else 0 + size_rd = rom["size-rd"] if "size-rd" in rom else 0 + + for _, checksum in checksums.items(): + query = """ + SELECT f.id as file_id, fs.id as fileset_id + FROM file f + JOIN filechecksum fc ON fc.file = f.id + JOIN fileset fs ON fs.id = f.fileset + JOIN transactions t ON t.fileset = fs.id + WHERE fc.checksum = %s + AND f.size = %s + AND f.`size-r` = %s + AND f.`size-rd` = %s + AND t.transaction != %s + """ + + cursor.execute(query, (checksum, size, size_r, size_rd, transaction_id)) + result = cursor.fetchall() + if result: + for file in result: + filesets_check_for_full.add(file["fileset_id"]) + files_to_update.add(file["file_id"]) + + for file_id in files_to_update: + query = """ + DELETE FROM filechecksum + WHERE file = %s + """ + cursor.execute(query, (file_id,)) + # Update checksums + for check, checksum in checksums.items(): + checksize, checktype, checksum = get_checksum_props(check, checksum) + query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)" + cursor.execute(query, (file_id, checksize, checktype, checksum)) + # Update sizes + query = """ + UPDATE file + SET size = %s, + `size-r` = %s, + `size-rd` = %s, + name = %s + WHERE id = %s + """ + cursor.execute( + query, (size, size_r, size_rd, normalised_path(rom["name"]), file_id) + ) + + +def scan_perform_match( + fileset, + src, + user, + fileset_id, + detection, + candidate_filesets, + automatic_merged_filesets, + manual_merged_filesets, + match_with_full_fileset, + mismatch_with_full_fileset, + manual_merged_with_detection, + filesets_with_missing_files, + conn, + skiplog, +): + """ + Performs matching for scan.dat. + If single candidate for match: + detection -> Copy all the files and checksums from scan. + partial -> Copy all the files and checksums from scan. + full -> Drop the scan fileset. But show the differences in file if any. + If more than one candidate for match: + Put them for manual merge. + """ + with conn.cursor() as cursor: + if len(candidate_filesets) == 1: + matched_fileset_id = candidate_filesets[0] + cursor.execute( + "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,) + ) + status = cursor.fetchone()["status"] + # Partial filesets can be turned full directly, as the files have already been updated. + # But the files that had missing size were not updated, so we need to check. + if status == "partial": + # Partial filesets contain all the files, so does the scanned filesets, so this case should not ideally happen. + if total_files(matched_fileset_id, conn) > total_fileset_files(fileset): + category_text = "Missing files" + log_text = f"Missing files in Fileset:{fileset_id}. Try manual merge with Fileset:{matched_fileset_id}." + add_manual_merge( + candidate_filesets, + fileset_id, + category_text, + log_text, + user, + conn, + log_text, + ) + filesets_with_missing_files += 1 + + else: + update_all_files(fileset, matched_fileset_id, False, conn) + update_fileset_status(cursor, matched_fileset_id, "full") + if not skiplog: + log_matched_fileset( + src, + fileset_id, + matched_fileset_id, + "full", + user, + conn, + ) + delete_original_fileset(fileset_id, conn) + automatic_merged_filesets += 1 + + # Detection filests can be turned full if the number of files are equal, + # otherwise we do manual merge to remove extra files. + elif status == "detection": + if total_fileset_files(fileset) == total_files( + matched_fileset_id, conn, detection_only=True + ): + update_all_files(fileset, matched_fileset_id, True, conn) + update_fileset_status(cursor, matched_fileset_id, "full") + if not skiplog: + log_matched_fileset( + src, + fileset_id, + matched_fileset_id, + "full", + user, + conn, + ) + delete_original_fileset(fileset_id, conn) + automatic_merged_filesets += 1 + + else: + category_text = "Manual Merge - Detection found" + log_text = f"Matched with detection. Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}." + add_manual_merge( + candidate_filesets, + fileset_id, + category_text, + log_text, + user, + conn, + log_text, + ) + manual_merged_with_detection += 1 + + # Drop the fileset, note down the file differences + elif status == "full": + (unmatched_candidate_files, unmatched_scan_files) = get_unmatched_files( + matched_fileset_id, fileset, conn + ) + fully_matched = ( + True + if len(unmatched_candidate_files) == 0 + and len(unmatched_scan_files) == 0 + else False + ) + if fully_matched: + match_with_full_fileset += 1 + else: + mismatch_with_full_fileset += 1 + log_scan_match_with_full( + fileset_id, + matched_fileset_id, + unmatched_candidate_files, + unmatched_scan_files, + fully_matched, + user, + conn, + ) + delete_original_fileset(fileset_id, conn) + + elif len(candidate_filesets) > 1: + category_text = "Manual Merge - Multiple Candidates" + log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}." + manual_merged_filesets += 1 + add_manual_merge( + candidate_filesets, + fileset_id, + category_text, + log_text, + user, + conn, + log_text, + ) + + return ( + automatic_merged_filesets, + manual_merged_filesets, + match_with_full_fileset, + mismatch_with_full_fileset, + manual_merged_with_detection, + filesets_with_missing_files, + ) + + +def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn): + """ + Updates all the files, if they were missed out earlier due to missing size. + """ + with conn.cursor() as cursor: + # Extracting the filename from the filepath. + cursor.execute( + "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = %s", + (candidate_fileset_id,), + ) + target_files = cursor.fetchall() + candidate_files = { + target_file["id"]: target_file["name"].lower() + for target_file in target_files + } + + scan_checksums = set() + scan_names_by_checksum = defaultdict(str) + same_filename_count = defaultdict(int) + + filename_to_filepath_map = defaultdict(str) + filepath_to_checksum_map = defaultdict(dict) + filepath_to_sizes_map = defaultdict(dict) + + for file in fileset["rom"]: + base_name = os.path.basename(normalised_path(file["name"])).lower() + checksums = defaultdict(str) + sizes = defaultdict(int) + for key in file: + if key.startswith("md5"): + scan_checksums.add((file[key], base_name)) + scan_names_by_checksum[(file[key], base_name)] = file["name"] + checksums[key] = file[key] + if key.startswith("size"): + sizes[key] = file[key] + + filepath_to_sizes_map[file["name"]] = sizes + filepath_to_checksum_map[file["name"]] = checksums + same_filename_count[base_name] += 1 + filename_to_filepath_map[base_name] = file["name"] + + checksums = defaultdict(dict) + filepath = "" + + for file_id, file_name in candidate_files.items(): + file_name = file_name.lower() + # Match by filename + if same_filename_count[file_name] == 1: + filepath = filename_to_filepath_map[file_name] + checksums = filepath_to_checksum_map[filepath] + + # If same filename occurs multiple times, fallback to checksum based match + else: + cursor.execute( + "SELECT checksum FROM filechecksum WHERE file = %s", (file_id,) + ) + checksum_rows = cursor.fetchall() + for row in checksum_rows: + checksum = row["checksum"] + if (checksum, file_name) in scan_checksums: + filepath = scan_names_by_checksum[(checksum, file_name)] + checksums = filepath_to_checksum_map[filepath] + + # Delete older checksums + query = """ + DELETE FROM filechecksum + WHERE file = %s + """ + cursor.execute(query, (file_id,)) + # Update the checksums + for key, checksum in checksums.items(): + checksize, checktype, checksum = get_checksum_props(key, checksum) + query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)" + cursor.execute(query, (file_id, checksize, checktype, checksum)) + + # Also updates the sizes, do not update the name if fileset not in detection state + query = """ + UPDATE file + SET size = %s, + `size-r` = %s, + `size-rd` = %s + """ + sizes = filepath_to_sizes_map[filepath] + if is_candidate_detection: + query += ",name = %s WHERE id = %s" + params = ( + sizes["size"], + sizes["size-r"], + sizes["size-rd"], + normalised_path(filepath), + file_id, + ) + else: + query += "WHERE id = %s" + params = (sizes["size"], sizes["size-r"], sizes["size-rd"], file_id) + cursor.execute(query, params) + + +def total_files(fileset_id, conn, detection_only=False): + """ + Returns the total number of files (only detection files if detection_only set to true) present in the given fileset from the database. + """ + with conn.cursor() as cursor: + query = """ + SELECT COUNT(*) AS count + FROM file f + JOIN fileset fs ON fs.id = f.fileset + """ + if detection_only: + query += """ + WHERE f.detection = 1 + AND fs.id = %s + """ + else: + query += "WHERE fs.id = %s" + cursor.execute(query, (fileset_id,)) + return cursor.fetchone()["count"] + + +def total_fileset_files(fileset): + """ + Returns the number of files present in the fileset + """ + return len(fileset["rom"]) + + +def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn): + """ + Returns a list of candidate filesets that can be merged. + Performs early filtering in SQL (by name, size) and then + applies checksum filtering and max-match filtering in Python. + """ + with conn.cursor() as cursor: + # Fetching detection filename and all sizes (size, size-r, size-rd) from database + query = """ + SELECT fs.id AS fileset_id, f.id as file_id, f.name, f.size, + f.`size-r` AS size_r, f.`size-rd` AS size_rd + FROM file f + JOIN fileset fs ON f.fileset = fs.id + JOIN game g ON g.id = fs.game + JOIN engine e ON e.id = g.engine + JOIN transactions t ON t.fileset = fs.id + WHERE f.detection = 1 + AND t.transaction != %s + """ + cursor.execute(query, (transaction_id,)) + raw_candidates = cursor.fetchall() + + # fileset id to detection files map + candidate_map = defaultdict(list) + total_detection_files_map = defaultdict(int) + for row in raw_candidates: + candidate_map[row["fileset_id"]].append( + { + "file_id": row["file_id"], + "name": os.path.basename(normalised_path(row["name"])).lower(), + "size": row["size"], + "size-r": row["size_r"], + "size-rd": row["size_rd"], + } + ) + for id, files in candidate_map.items(): + total_detection_files_map[id] = len(files) + + set_checksums = set() + set_file_name_size = set() + for file in fileset["rom"]: + name = os.path.basename(normalised_path(file["name"])) + for key in file: + if key.startswith("md5"): + set_checksums.add( + ( + file[key], + name.lower(), + int(file["size"]), + int(file["size-r"]), + int(file["size-rd"]), + ) + ) + set_checksums.add( + ( + file[key], + name.lower(), + -1, + int(file["size-r"]), + int(file["size-rd"]), + ) + ) + set_file_name_size.add( + (name.lower(), -1, int(file["size-r"]), int(file["size-rd"])) + ) + set_file_name_size.add( + (name.lower(), int(file["size"]), int(file["size-r"]), int(file["size-rd"])) + ) + + # Filter candidates by detection filename and file size (including -1) and increase matched file count + # if filesize = -1, + # elif filesize <= checksize and checksum matches, + # elif filesize > checksize. + match_counts = {} + for fileset_id, files in candidate_map.items(): + count = 0 + with conn.cursor() as cursor: + for f in files: + filename = os.path.basename(f["name"]).lower() + size = f["size"] + size_r = f["size-r"] + size_rd = f["size-rd"] + if (filename, size, size_r, size_rd) in set_file_name_size: + if size == -1: + count += 1 + else: + cursor.execute( + """ + SELECT checksum, checksize, checktype + FROM filechecksum + WHERE file = %s + """, + (f["file_id"],), + ) + checksums = cursor.fetchall() + not_inc_count = False + for c in checksums: + filesize = size + checksum = c["checksum"] + checksize = c["checksize"] + checktype = c["checktype"] + # Macfiles handling + if checktype in ["md5-r", "md5-rt"]: + filesize = size_rd + + if checksize == "1M": + checksize = 1048576 + elif checksize == "0": + checksize = filesize + if filesize <= int(checksize): + if ( + checksum, + filename, + size, + size_r, + size_rd, + ) in set_checksums: + count += 1 + not_inc_count = True + # if it was a true match, checksum should be present + break + if not not_inc_count: + count += 1 + if count > 0 and total_detection_files_map[fileset_id] <= count: + match_counts[fileset_id] = count + + # Filter only entries with maximum number of matched files + if not match_counts: + return [] + + max_match = max(match_counts.values()) + candidates = [fid for fid, count in match_counts.items() if count == max_match] + + matched_candidates = [] + for candidate in candidates: + if is_full_detection_checksum_match(candidate, fileset, conn): + matched_candidates.append(candidate) + + if len(matched_candidates) != 0: + candidates = matched_candidates + + return candidates + + +def get_unmatched_files(candidate_fileset, fileset, conn): + """ + Checks if all checksums from candidate_fileset match dat file checksums. + Returns: + unmatched_candidate_files: candidate files whose checksums weren't found in scan + unmatched_dat_files: dat files whose checksums weren't matched by candidate + """ + with conn.cursor() as cursor: + cursor.execute( + "SELECT id, name FROM file WHERE fileset = %s", (candidate_fileset,) + ) + candidate_file_rows = cursor.fetchall() + candidate_files = {row["id"]: row["name"] for row in candidate_file_rows} + + dat_checksums = set() + dat_names_by_checksum = {} + + for file in fileset["rom"]: + base_name = os.path.basename(normalised_path(file["name"])).lower() + for key in file: + if key.startswith("md5"): + dat_checksums.add((file[key], base_name)) + dat_names_by_checksum[(file[key], base_name)] = file["name"] + + unmatched_candidate_files = [] + matched_dat_pairs = set() + + for file_id, file_name in candidate_files.items(): + cursor.execute( + "SELECT checksum FROM filechecksum WHERE file = %s", (file_id,) + ) + checksum_rows = cursor.fetchall() + + base_name = os.path.basename(file_name).lower() + match_found = False + + for row in checksum_rows: + checksum = row["checksum"] + if (checksum, base_name) in dat_checksums: + matched_dat_pairs.add((checksum, base_name)) + match_found = True + + if not match_found: + unmatched_candidate_files.append(file_name) + + unmatched_dat_files = { + dat_names_by_checksum[key] + for key in dat_checksums + if key not in matched_dat_pairs + } + unmatched_dat_files = list(unmatched_dat_files) + + return (unmatched_candidate_files, unmatched_dat_files) + + +def is_full_detection_checksum_match(candidate_fileset, fileset, conn): + """ + Return type - Boolean + Checks if all the detection files in the candidate fileset have corresponding checksums matching with scan. + + scan - rom ( name "AFM Read Me!_2" size 8576 size-r 1 size-rd 0 modification-time 1993-05-12 md5 dsd16ccea050db521a678a1cdc33794c md5-5000 008e76ec3ae58d0add637ea7aa299a2a md5-t-5000 118e76ec3ae58d0add637ea7aa299a2c md5-1048576 37d16ccea050db521a678a1cdc33794c) + """ + with conn.cursor() as cursor: + cursor.execute( + "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name FROM file WHERE detection=1 AND fileset = %s", + (candidate_fileset,), + ) + target_files = cursor.fetchall() + candidate_files = { + target_file["id"]: target_file["name"] for target_file in target_files + } + + # set of (checksum, filename) + scan_checksums = set() + for file in fileset["rom"]: + for key in file: + if key.startswith("md5"): + name = os.path.basename(normalised_path(file["name"])) + scan_checksums.add((file[key], name.lower())) + + for detection_file_id, detection_file_name in candidate_files.items(): + query = """ + SELECT fc.checksum, fc.checksize, fc.checktype + FROM filechecksum fc + WHERE fc.file = %s + """ + cursor.execute(query, (detection_file_id,)) + checksums_info = cursor.fetchall() + match_found = False + if checksums_info: + for checksum_info in checksums_info: + checksum = checksum_info["checksum"] + if ( + checksum, + os.path.basename(detection_file_name.lower()), + ) not in scan_checksums: + match_found = True + break + + if match_found: + return False + + return True + + +# ------------------------------------------------------------------------------------------------------- +# Set.dat processing below +# ------------------------------------------------------------------------------------------------------- + + def set_process( game_data, resources, @@ -909,6 +1698,9 @@ def set_process( mismatch_filesets = 0 dropped_early_no_candidate = 0 dropped_early_single_candidate_multiple_sets = 0 + + fileset_count = 0 + # A mapping from set filesets to candidate filesets list set_to_candidate_dict = defaultdict(list) id_to_fileset_dict = defaultdict(dict) @@ -937,6 +1729,11 @@ def set_process( megakey = "" log_text = f"State {source_status}." + set_dat_metadata = "" + for meta in fileset: + if meta != "rom": + set_dat_metadata += meta + " = " + fileset[meta] + " , " + (fileset_id, existing) = insert_new_fileset( fileset, conn, @@ -947,13 +1744,18 @@ def set_process( transaction_id, log_text, user, + set_dat_metadata=set_dat_metadata, skiplog=skiplog, ) + if existing: continue - candidate_filesets = set_filter_candidate_filesets( - fileset_id, fileset, transaction_id, conn + # Separating out the matching logic for glk engine + engine_name = fileset["sourcefile"].split("-")[0] + + (candidate_filesets, fileset_count) = set_filter_candidate_filesets( + fileset_id, fileset, fileset_count, transaction_id, engine_name, conn ) # Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them. @@ -969,11 +1771,19 @@ def set_process( ) dropped_early_no_candidate += 1 delete_original_fileset(fileset_id, conn) - id_to_fileset_dict[fileset_id] = fileset set_to_candidate_dict[fileset_id].extend(candidate_filesets) - # Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants. + console_message = "Candidate filtering finished." + console_log(console_message) + console_message = ( + f"{dropped_early_no_candidate} Filesets Dropped for No candidates." + ) + console_log(console_message) + console_message = "Looking for duplicates..." + console_log(console_message) + + # Remove all such filesets, which have many to one mapping with a single candidate, just merge one of them. value_to_keys = defaultdict(list) for set_fileset, candidates in set_to_candidate_dict.items(): if len(candidates) == 1: @@ -997,7 +1807,12 @@ def set_process( platform = result["platform"] language = result["language"] + # Skip the first entry, let it merge and drop others + skip = True for set_fileset in set_filesets: + if skip: + skip = False + continue fileset = id_to_fileset_dict[set_fileset] category_text = "Drop fileset - Duplicates" fileset_name = fileset["name"] if "name" in fileset else "" @@ -1005,6 +1820,7 @@ def set_process( fileset["description"] if "description" in fileset else "" ) log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})" + console_log(log_text) create_log( escape_string(category_text), user, escape_string(log_text), conn ) @@ -1013,8 +1829,18 @@ def set_process( del set_to_candidate_dict[set_fileset] del id_to_fileset_dict[set_fileset] + manual_merge_map = defaultdict(list) + + match_count = 1 for fileset_id, candidate_filesets in set_to_candidate_dict.items(): + console_log_matching(match_count) fileset = id_to_fileset_dict[fileset_id] + + # Filter by platform to reduce manual merge + # candidate_filesets = set_filter_by_platform( + # fileset["name"], candidate_filesets, conn + # ) + ( fully_matched_filesets, auto_merged_filesets, @@ -1031,12 +1857,44 @@ def set_process( auto_merged_filesets, manual_merged_filesets, mismatch_filesets, + manual_merge_map, + set_to_candidate_dict, conn, skiplog, ) - # Final log + match_count += 1 + console_log("Matching performed.") + with conn.cursor() as cursor: + for fileset_id, candidates in manual_merge_map.items(): + if len(candidates) == 0: + category_text = "Drop fileset - No Candidates" + fileset = id_to_fileset_dict[fileset_id] + fileset_name = fileset["name"] if "name" in fileset else "" + fileset_description = ( + fileset["description"] if "description" in fileset else "" + ) + log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}." + create_log( + escape_string(category_text), user, escape_string(log_text), conn + ) + dropped_early_no_candidate += 1 + delete_original_fileset(fileset_id, conn) + else: + category_text = "Manual Merge Required" + log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}." + manual_merged_filesets += 1 + add_manual_merge( + candidates, + fileset_id, + category_text, + log_text, + user, + conn, + log_text, + ) + cursor.execute( "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s", (transaction_id,), @@ -1047,9 +1905,48 @@ def set_process( create_log(escape_string(category_text), user, escape_string(log_text), conn) category_text = "Upload information" log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {auto_merged_filesets}. Filesets dropped early (no candidate) - {dropped_early_no_candidate}. Filesets dropped early (mapping to single detection) - {dropped_early_single_candidate_multiple_sets}. Filesets requiring manual merge: {manual_merged_filesets}. Partial/Full filesets already present: {fully_matched_filesets}. Partial/Full filesets with mismatch {mismatch_filesets}." + console_log(log_text) create_log(escape_string(category_text), user, escape_string(log_text), conn) +def set_filter_by_platform(gameid, candidate_filesets, conn): + """ + Return - list(number) : list of fileset ids of filtered candidates. + The number of manual merges in case the file size is not present (equal to -1) are too high. So we try to filter by platform extracted from the gameId of the set.dat fileset. We may disable this feature later or keep it optional with a command line argument. + """ + with conn.cursor() as cursor: + # e.g. sq2-coco3-1 + possible_platform_names = gameid.split("-")[1:] + + # Align platform names in set.dat and detection entries + for i, platform in enumerate(possible_platform_names): + if platform == "win": + possible_platform_names[i] = "windows" + elif platform == "mac": + possible_platform_names[i] = "macintosh" + + filtered_candidate_fileset = [] + + for candidate_fileset_id in candidate_filesets: + query = """ + SELECT g.platform + FROM fileset fs + JOIN game g ON g.id = fs.game + WHERE fs.id = %s + """ + cursor.execute(query, (candidate_fileset_id,)) + candidate_platform = cursor.fetchone()["platform"] + if candidate_platform in possible_platform_names: + filtered_candidate_fileset.append(candidate_fileset_id) + + # If nothing was filtred, then it is likely, that platform information was not present, so we fallback to original list of candidates. + return ( + candidate_filesets + if len(filtered_candidate_fileset) == 0 + else filtered_candidate_fileset + ) + + def set_perform_match( fileset, src, @@ -1061,16 +1958,17 @@ def set_perform_match( auto_merged_filesets, manual_merged_filesets, mismatch_filesets, + manual_merge_map, + set_to_candidate_dict, conn, skiplog, ): """ - TODO + "Performs matching for set.dat" """ with conn.cursor() as cursor: if len(candidate_filesets) == 1: matched_fileset_id = candidate_filesets[0] - cursor.execute( "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,) ) @@ -1080,7 +1978,7 @@ def set_perform_match( set_populate_file(fileset, matched_fileset_id, conn, detection) auto_merged_filesets += 1 if not skiplog: - set_log_matched_fileset( + log_matched_fileset( src, fileset_id, matched_fileset_id, @@ -1089,10 +1987,19 @@ def set_perform_match( conn, ) delete_original_fileset(fileset_id, conn) + remove_manual_merge_if_size_mismatch( + matched_fileset_id, manual_merge_map, set_to_candidate_dict, conn + ) elif status == "partial" or status == "full": - (is_match, unmatched_files) = is_full_checksum_match( + (unmatched_candidate_files, unmatched_dat_files) = get_unmatched_files( matched_fileset_id, fileset, conn ) + is_match = ( + True + if len(unmatched_candidate_files) == 0 + and len(unmatched_dat_files) == 0 + else False + ) if is_match: category_text = "Already present" log_text = f"Already present as - Fileset:{matched_fileset_id}. Deleting Fileset:{fileset_id}" @@ -1108,53 +2015,21 @@ def set_perform_match( else: category_text = "Mismatch" - log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge." - print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}." + log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge. Unmatched Files in set.dat fileset = {len(unmatched_dat_files)} Unmatched Files in candidate fileset = {len(unmatched_candidate_files)}. List of unmatched files scan.dat : {', '.join(scan_file for scan_file in unmatched_dat_files)}, List of unmatched files full fileset : {', '.join(scan_file for scan_file in unmatched_candidate_files)}" + console_log(log_text) + # print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}." mismatch_filesets += 1 add_manual_merge( [matched_fileset_id], fileset_id, category_text, log_text, - print_text, user, conn, ) elif len(candidate_filesets) > 1: - found_match = False - for candidate_fileset in candidate_filesets: - (is_match, _) = is_full_checksum_match(candidate_fileset, fileset, conn) - if is_match: - update_fileset_status(cursor, candidate_fileset, "partial") - set_populate_file(fileset, candidate_fileset, conn, detection) - auto_merged_filesets += 1 - if not skiplog: - set_log_matched_fileset( - src, - fileset_id, - candidate_fileset, - "partial", - user, - conn, - ) - delete_original_fileset(fileset_id, conn) - found_match = True - break - - if not found_match: - category_text = "Manual Merge Required" - log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}." - manual_merged_filesets += 1 - add_manual_merge( - candidate_filesets, - fileset_id, - category_text, - log_text, - log_text, - user, - conn, - ) + manual_merge_map[fileset_id] = candidate_filesets return ( fully_matched_filesets, @@ -1164,8 +2039,74 @@ def set_perform_match( ) +def remove_manual_merge_if_size_mismatch( + child_fileset, manual_merge_map, set_to_candidate_dict, conn +): + with conn.cursor() as cursor: + query = """ + SELECT f.name, f.size + FROM fileset fs + JOIN file f ON f.fileset = fs.id + WHERE fs.id = %s + AND f.detection = 1 + """ + cursor.execute(query, (child_fileset,)) + files = cursor.fetchall() + + for possible_removals in [manual_merge_map, set_to_candidate_dict]: + for parent_fileset, child_list in possible_removals.items(): + if child_fileset not in child_list: + continue + + for file in files: + if file["size"] == -1: + continue + + query = """ + SELECT fs.id + FROM fileset fs + JOIN file f ON f.fileset = fs.id + WHERE fs.id = %s + AND REGEXP_REPLACE(f.name, '^.*[\\\\/]', '') = %s + AND f.size = %s + LIMIT 1 + """ + filename = os.path.basename(normalised_path(file["name"])) + cursor.execute(query, (parent_fileset, filename, file["size"])) + result = cursor.fetchall() + + if not result: + remove_manual_merge( + child_fileset, + parent_fileset, + manual_merge_map, + set_to_candidate_dict, + conn, + ) + break + + +def remove_manual_merge( + child_fileset, parent_fileset, manual_merge_map, set_to_candidate_dict, conn +): + if parent_fileset in manual_merge_map: + if child_fileset in manual_merge_map[parent_fileset]: + manual_merge_map[parent_fileset].remove(child_fileset) + if parent_fileset in set_to_candidate_dict: + if child_fileset in set_to_candidate_dict[parent_fileset]: + set_to_candidate_dict[parent_fileset].remove(child_fileset) + + with conn.cursor() as cursor: + query = """ + DELETE FROM possible_merges + WHERE child_fileset = %s + AND parent_fileset = %s + """ + cursor.execute(query, (child_fileset, parent_fileset)) + + def add_manual_merge( - child_filesets, parent_fileset, category_text, log_text, print_text, user, conn + child_filesets, parent_fileset, category_text, log_text, user, conn, print_text=None ): """ Adds the manual merge entries to a table called possible_merges. @@ -1181,7 +2122,8 @@ def add_manual_merge( cursor.execute(query, (child_fileset, parent_fileset)) create_log(escape_string(category_text), user, escape_string(log_text), conn) - print(print_text) + if print_text: + print(print_text) def is_full_checksum_match(candidate_fileset, fileset, conn): @@ -1219,71 +2161,190 @@ def is_full_checksum_match(candidate_fileset, fileset, conn): return (len(unmatched_files) == 0, unmatched_files) -def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn): +def set_filter_candidate_filesets( + fileset_id, fileset, fileset_count, transaction_id, engine_name, conn +): """ - Returns a list of candidate filesets that can be merged + Returns a list of candidate filesets that can be merged. + Performs early filtering in SQL (by engine, name, size) and then + applies checksum filtering and max-match filtering in Python. + In case of glk engines, filtering is not by name, rather gameid is used. """ + is_glk = engine_name == "glk" with conn.cursor() as cursor: - # Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches + fileset_count += 1 + console_log_candidate_filtering(fileset_count) + # Early filter candidates using enginename, filename and size query = """ - WITH candidate_fileset AS ( - SELECT fs.id AS fileset_id, f.name, f.size + SELECT fs.id AS fileset_id, f.id AS file_id, f.name, f.size FROM file f JOIN fileset fs ON f.fileset = fs.id JOIN game g ON g.id = fs.game JOIN engine e ON e.id = g.engine JOIN transactions t ON t.fileset = fs.id - WHERE fs.id != %s - AND e.engineid = %s + WHERE e.engineid = %s AND f.detection = 1 AND t.transaction != %s - ), - total_detection_files AS ( - SELECT cf.fileset_id, COUNT(*) AS detection_files_found - FROM candidate_fileset cf - GROUP BY fileset_id - ), - set_fileset AS ( - SELECT name, size FROM file - WHERE fileset = %s - ), - matched_detection_files AS ( - SELECT cf.fileset_id, COUNT(*) AS match_files_count - FROM candidate_fileset cf - JOIN set_fileset sf ON ( ( - cf.name = sf.name - OR - REGEXP_REPLACE(cf.name, '^.*[\\\\/]', '') = REGEXP_REPLACE(sf.name, '^.*[\\\\/]', '') - ) AND (cf.size = sf.size OR cf.size = -1) ) - GROUP BY cf.fileset_id - ), - valid_matched_detection_files AS ( - SELECT mdf.fileset_id, mdf.match_files_count AS valid_match_files_count - FROM matched_detection_files mdf - JOIN total_detection_files tdf ON tdf.fileset_id = mdf.fileset_id - WHERE tdf.detection_files_found <= mdf.match_files_count - ), - max_match_count AS ( - SELECT MAX(valid_match_files_count) AS max_count FROM valid_matched_detection_files - ) - SELECT vmdf.fileset_id - FROM valid_matched_detection_files vmdf - JOIN total_detection_files tdf ON vmdf.fileset_id = tdf.fileset_id - JOIN max_match_count mmc ON vmdf.valid_match_files_count = mmc.max_count """ - + if is_glk: + query += " AND (g.gameid = %s OR (g.gameid != %s AND g.gameid LIKE %s))" + gameid_pattern = f"%{fileset['name']}%" + cursor.execute( + query, + ( + engine_name, + transaction_id, + fileset["name"], + fileset["name"], + gameid_pattern, + ), + ) + else: + cursor.execute(query, (fileset["sourcefile"], transaction_id)) + raw_candidates = cursor.fetchall() + + # fileset id to detection files map + candidate_map = defaultdict(list) + total_detection_files_map = defaultdict(int) + for row in raw_candidates: + candidate_map[row["fileset_id"]].append( + { + "file_id": row["file_id"], + "name": os.path.basename(normalised_path(row["name"])).lower(), + "size": row["size"], + } + ) + for id, files in candidate_map.items(): + total_detection_files_map[id] = len(files) + + set_checksums = set() + set_file_name_size = set() + set_glk_file_size = set() + for file in fileset["rom"]: + name = os.path.basename(normalised_path(file["name"])) + for key in file: + if key.startswith("md5"): + set_checksums.add((file[key], name.lower(), int(file["size"]))) + set_checksums.add((file[key], name.lower(), -1)) + set_file_name_size.add((name.lower(), -1)) + set_file_name_size.add((name.lower(), int(file["size"]))) + if is_glk: + set_glk_file_size.add(int(file["size"])) + + # Filter candidates by detection filename and file size (including -1) and increase matched file count + # if filesize = -1, + # elif filesize <= checksize and checksum matches, + # elif filesize > checksize. + match_counts = {} + for fileset_id, files in candidate_map.items(): + count = 0 + with conn.cursor() as cursor: + for f in files: + filename = os.path.basename(f["name"]).lower() + filesize = f["size"] + if is_glk and (filesize in set_glk_file_size or filesize == 0): + count += 1 + if (filename, filesize) in set_file_name_size: + if filesize == -1: + count += 1 + else: + cursor.execute( + """ + SELECT checksum, checksize, checktype + FROM filechecksum + WHERE file = %s + """, + (f["file_id"],), + ) + checksums = cursor.fetchall() + not_inc_count = False + for c in checksums: + checksum = c["checksum"] + checksize = c["checksize"] + if checksize == "1M": + checksize = 1048576 + elif checksize == "0": + checksize = filesize + if filesize <= int(checksize): + if (checksum, filename, filesize) in set_checksums: + count += 1 + not_inc_count = True + # if it was a true match, checksum should be present + break + if not not_inc_count: + count += 1 + if count > 0 and total_detection_files_map[fileset_id] <= count: + match_counts[fileset_id] = count + + # Filter only entries with maximum number of matched files + if not match_counts: + return ([], fileset_count) + + max_match = max(match_counts.values()) + candidates = [fid for fid, count in match_counts.items() if count == max_match] + + matched_candidates = [] + for candidate in candidates: + if is_full_detection_checksum_match(candidate, fileset, conn): + matched_candidates.append(candidate) + + if len(matched_candidates) != 0: + candidates = matched_candidates + + return (candidates, fileset_count) + + +def is_candidate_by_checksize(candidate, fileset, conn): + with conn.cursor() as cursor: cursor.execute( - query, (fileset_id, fileset["sourcefile"], transaction_id, fileset_id) + "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE detection=1 AND fileset = %s", + (candidate,), ) - rows = cursor.fetchall() - - candidates = [] - if rows: - for row in rows: - candidates.append(row["fileset_id"]) + target_files = cursor.fetchall() + candidate_files = { + target_file["id"]: [target_file["name"], target_file["size"]] + for target_file in target_files + } - return candidates + # set of (checksum, filename) + scan_checksums = set() + for file in fileset["rom"]: + for key in file: + if key.startswith("md5"): + name = os.path.basename(normalised_path(file["name"])) + scan_checksums.add((file[key], name.lower())) + + for detection_file_id, [ + detection_file_name, + detection_file_size, + ] in candidate_files.items(): + query = """ + SELECT fc.checksum, fc.checksize, fc.checktype + FROM filechecksum fc + WHERE fc.file = %s + """ + cursor.execute(query, (detection_file_id,)) + checksums_info = cursor.fetchall() + if checksums_info: + for checksum_info in checksums_info: + checksum = checksum_info["checksum"] + checksize = checksum_info["checksize"] + if checksize == "1M": + checksize = 1048576 + if ( + ( + checksum, + os.path.basename(detection_file_name.lower()), + ) + not in scan_checksums + and detection_file_size <= int(checksize) + and detection_file_size != -1 + ): + continue + else: + return True + return False def process_fileset( @@ -1374,13 +2435,13 @@ def find_matching_filesets(fileset, conn, status): checksize, checktype, checksum = get_checksum_props( checktype, checksum ) - query = f"""SELECT DISTINCT fs.id AS fileset_id + query = """SELECT DISTINCT fs.id AS fileset_id FROM fileset fs JOIN file f ON fs.id = f.fileset JOIN filechecksum fc ON f.id = fc.file - WHERE fc.checksum = '{checksum}' AND fc.checktype = '{checktype}' - AND fs.status IN ({state})""" - cursor.execute(query) + WHERE fc.checksum = %s AND fc.checktype = %s + AND fs.status IN (%s)""" + cursor.execute(query, (checksum, checktype, state)) records = cursor.fetchall() if records: for record in records: @@ -1403,16 +2464,16 @@ def matching_set(fileset, conn): checksum = checksum.split(":")[1] size = file["size"] - query = f""" + query = """ SELECT DISTINCT fs.id AS fileset_id FROM fileset fs JOIN file f ON fs.id = f.fileset JOIN filechecksum fc ON f.id = fc.file - WHERE fc.checksum = '{checksum}' AND fc.checktype LIKE 'md5%' - AND fc.checksize > {size} + WHERE fc.checksum = %s AND fc.checktype LIKE 'md5%' + AND fc.checksize > %s AND fs.status = 'detection' """ - cursor.execute(query) + cursor.execute(query, (checksum, size)) records = cursor.fetchall() if records: for record in records: @@ -1442,11 +2503,12 @@ def handle_matched_filesets( if is_full_matched: break cursor.execute( - f"SELECT status FROM fileset WHERE id = {matched_fileset_id}" + "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,) ) status = cursor.fetchone()["status"] cursor.execute( - f"SELECT COUNT(file.id) FROM file WHERE fileset = {matched_fileset_id}" + "SELECT COUNT(file.id) FROM file WHERE fileset = %s", + (matched_fileset_id,), ) count = cursor.fetchone()["COUNT(file.id)"] @@ -1492,28 +2554,31 @@ def handle_matched_filesets( def delete_original_fileset(fileset_id, conn): with conn.cursor() as cursor: - cursor.execute(f"DELETE FROM file WHERE fileset = {fileset_id}") - cursor.execute(f"DELETE FROM fileset WHERE id = {fileset_id}") + cursor.execute("DELETE FROM file WHERE fileset = %s", (fileset_id,)) + cursor.execute("DELETE FROM fileset WHERE id = %s", (fileset_id,)) conn.commit() def update_fileset_status(cursor, fileset_id, status): - cursor.execute(f""" + cursor.execute( + """ UPDATE fileset SET - status = '{status}', - `timestamp` = FROM_UNIXTIME({int(time.time())}) - WHERE id = {fileset_id} - """) + status = %s, + `timestamp` = FROM_UNIXTIME(%s) + WHERE id = %s + """, + (status, int(time.time()), fileset_id), + ) def populate_file(fileset, fileset_id, conn, detection): with conn.cursor() as cursor: - cursor.execute(f"SELECT * FROM file WHERE fileset = {fileset_id}") + cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,)) target_files = cursor.fetchall() target_files_dict = {} for target_file in target_files: cursor.execute( - f"SELECT * FROM filechecksum WHERE file = {target_file['id']}" + "SELECT * FROM filechecksum WHERE file = %s", (target_file["id"],) ) target_checksums = cursor.fetchall() for checksum in target_checksums: @@ -1579,7 +2644,7 @@ def populate_file(fileset, fileset_id, conn, detection): for key, value in file.items(): if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]: - insert_filechecksum(file, key, conn) + insert_filechecksum(file, key, file_id, conn) if value in target_files_dict and not file_exists: cursor.execute( f"SELECT detection_type FROM file WHERE id = {target_files_dict[value]['id']}" @@ -1638,7 +2703,8 @@ def set_populate_file(fileset, fileset_id, conn, detection): with conn.cursor() as cursor: # Extracting the filename from the filepath. cursor.execute( - f"SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = {fileset_id}" + "SELECT id, REGEXP_REPLACE(name, '^.*[\\\\/]', '') AS name, size FROM file WHERE fileset = %s", + (fileset_id,), ) target_files = cursor.fetchall() candidate_files = { @@ -1646,6 +2712,13 @@ def set_populate_file(fileset, fileset_id, conn, detection): for target_file in target_files } + # For glk engines + candidate_file_size = { + target_file["size"]: target_file["id"] for target_file in target_files + } + + engine_name = fileset["sourcefile"].split("-")[0] + seen_detection_files = set() for file in fileset["rom"]: @@ -1655,35 +2728,38 @@ def set_populate_file(fileset, fileset_id, conn, detection): filename = os.path.basename(normalised_path(file["name"])) - if ((filename.lower(), file["size"]) in seen_detection_files) or ( - filename.lower() not in candidate_files - or ( - filename.lower() in candidate_files - and ( - candidate_files[filename.lower()][1] != -1 - and candidate_files[filename.lower()][1] != file["size"] + if (engine_name == "glk" and file["size"] not in candidate_file_size) or ( + engine_name != "glk" + and ( + (filename.lower(), file["size"]) in seen_detection_files + or ( + filename.lower() not in candidate_files + or ( + filename.lower() in candidate_files + and ( + candidate_files[filename.lower()][1] != -1 + and candidate_files[filename.lower()][1] != file["size"] + ) + ) ) ) ): name = normalised_path(file["name"]) values = [name] - values.append(file["size"] if "size" in file else "0") values.append(file["size-r"] if "size-r" in file else "0") values.append(file["size-rd"] if "size-rd" in file else "0") - values.extend([checksum, fileset_id, detection, "None"]) - placeholders = ( - ["%s"] * (len(values[:5])) + ["%s"] + ["%s"] * 2 + ["NOW()"] - ) - query = f"INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ({', '.join(placeholders)})" + query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, NOW())" cursor.execute(query, values) cursor.execute("SET @file_last = LAST_INSERT_ID()") cursor.execute("SELECT @file_last AS file_id") - insert_filechecksum(file, "md5", conn) + file_id = cursor.fetchone()["file_id"] + + insert_filechecksum(file, "md5", file_id, conn) else: query = """ @@ -1692,15 +2768,19 @@ def set_populate_file(fileset, fileset_id, conn, detection): name = %s WHERE id = %s """ + # Filtering was by filename, but we are still updating the file with the original filepath. cursor.execute( query, ( file["size"], normalised_path(file["name"]), - candidate_files[filename.lower()][0], + candidate_files[filename.lower()][0] + if engine_name != "glk" + else candidate_file_size[file["size"]], ), ) + query = """ INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s) @@ -1708,12 +2788,24 @@ def set_populate_file(fileset, fileset_id, conn, detection): cursor.execute( query, ( - candidate_files[filename.lower()][0], + candidate_files[filename.lower()][0] + if engine_name != "glk" + else candidate_file_size[file["size"]], checksize, checktype, checksum, ), ) + + add_all_equal_checksums( + checksize, + checktype, + checksum, + candidate_files[filename.lower()][0] + if engine_name != "glk" + else candidate_file_size[file["size"]], + conn, + ) seen_detection_files.add((filename.lower(), file["size"])) @@ -1727,6 +2819,7 @@ def insert_new_fileset( transaction_id, log_text, user, + set_dat_metadata="", ip="", skiplog=False, ): @@ -1739,28 +2832,32 @@ def insert_new_fileset( log_text, conn, username=user, + set_dat_metadata=set_dat_metadata, ip=ip, skiplog=skiplog, ) if fileset_id: for file in fileset["rom"]: insert_file(file, detection, src, conn) + file_id = None + with conn.cursor() as cursor: + cursor.execute("SELECT @file_last AS file_id") + file_id = cursor.fetchone()["file_id"] for key, value in file.items(): - if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]: - insert_filechecksum(file, key, conn) + if key not in [ + "name", + "size", + "size-r", + "size-rd", + "sha1", + "crc", + "modification-time", + ]: + insert_filechecksum(file, key, file_id, conn) return (fileset_id, existing) def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn): - category_text = f"Matched from {src}" - log_text = f"Matched Fileset:{fileset_id}. State {state}." - log_last = create_log( - escape_string(category_text), user, escape_string(log_text), conn - ) - update_history(fileset_last, fileset_id, conn, log_last) - - -def set_log_matched_fileset(src, fileset_last, fileset_id, state, user, conn): category_text = f"Matched from {src}" log_text = ( f"Matched Fileset:{fileset_last} with Fileset:{fileset_id}. State {state}." @@ -1771,12 +2868,34 @@ def set_log_matched_fileset(src, fileset_last, fileset_id, state, user, conn): update_history(fileset_last, fileset_id, conn, log_last) +def log_scan_match_with_full( + fileset_last, + candidate_id, + unmatched_candidate_files, + unmatched_scan_files, + fully_matched, + user, + conn, +): + category_text = "Mismatch with Full set" + if fully_matched: + category_text = "Existing as Full set." + log_text = f"""Files mismatched with Full Fileset:{candidate_id}. Unmatched Files in scan fileset = {len(unmatched_scan_files)}. Unmatched Files in full fileset = {len(unmatched_candidate_files)}. List of unmatched files scan.dat : {", ".join(scan_file for scan_file in unmatched_scan_files)}, List of unmatched files full fileset : {", ".join(scan_file for scan_file in unmatched_candidate_files)}""" + if fully_matched: + log_text = ( + f"Fileset matched completely with Full Fileset:{candidate_id}. Dropping." + ) + print(log_text) + create_log(escape_string(category_text), user, escape_string(log_text), conn) + + def finalize_fileset_insertion( conn, transaction_id, src, filepath, author, version, source_status, user ): with conn.cursor() as cursor: cursor.execute( - f"SELECT COUNT(fileset) from transactions WHERE `transaction` = {transaction_id}" + "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s", + (transaction_id,), ) fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"] category_text = f"Uploaded from {src}" @@ -1814,7 +2933,7 @@ def user_integrity_check(data, ip, game_metadata=None): print(f"Failed to connect to database: {e}") return - conn.cursor().execute(f"SET @fileset_time_last = {int(time.time())}") + conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),)) try: with conn.cursor() as cursor: @@ -1839,12 +2958,13 @@ def user_integrity_check(data, ip, game_metadata=None): missing_set = set() for fileset_id in matched_map.keys(): - cursor.execute(f"SELECT * FROM file WHERE fileset = {fileset_id}") + cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,)) target_files = cursor.fetchall() target_files_dict = {} for target_file in target_files: cursor.execute( - f"SELECT * FROM filechecksum WHERE file = {target_file['id']}" + "SELECT * FROM filechecksum WHERE file = %s", + (target_file["id"],), ) target_checksums = cursor.fetchall() for checksum in target_checksums: @@ -1924,12 +3044,13 @@ def user_integrity_check(data, ip, game_metadata=None): most_matched = matched_list[0] matched_fileset_id, matched_count = most_matched[0], most_matched[1] cursor.execute( - f"SELECT status FROM fileset WHERE id = {matched_fileset_id}" + "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,) ) status = cursor.fetchone()["status"] cursor.execute( - f"SELECT COUNT(file.id) FROM file WHERE fileset = {matched_fileset_id}" + "SELECT COUNT(file.id) FROM file WHERE fileset = %s", + (matched_fileset_id,), ) count = cursor.fetchone()["COUNT(file.id)"] if status == "full" and count == matched_count: @@ -1967,11 +3088,47 @@ def user_integrity_check(data, ip, game_metadata=None): def add_usercount(fileset, conn): with conn.cursor() as cursor: cursor.execute( - f"UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = {fileset}" + "UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = %s", + (fileset,), ) - cursor.execute(f"SELECT user_count from fileset WHERE id = {fileset}") + cursor.execute("SELECT user_count from fileset WHERE id = %s", (fileset,)) count = cursor.fetchone()["user_count"] if count >= 3: cursor.execute( - f"UPDATE fileset SET status = 'ReadyForReview' WHERE id = {fileset}" + "UPDATE fileset SET status = 'ReadyForReview' WHERE id = %s", (fileset,) ) + + +def console_log(message): + sys.stdout.write(" " * 50 + "\r") + sys.stdout.flush() + print(message) + + +def console_log_candidate_filtering(fileset_count): + sys.stdout.write(f"Filtering Candidates - Fileset {fileset_count}\r") + sys.stdout.flush() + + +def console_log_file_update(fileset_count): + sys.stdout.write(f"Updating files - Fileset {fileset_count}\r") + sys.stdout.flush() + + +def console_log_matching(fileset_count): + sys.stdout.write(f"Performing Match - Fileset {fileset_count}\r") + sys.stdout.flush() + + +def console_log_detection(fileset_count): + sys.stdout.write(f"Processing - Fileset {fileset_count}\r") + sys.stdout.flush() + + +def console_log_total_filesets(file_path): + count = 0 + with open(file_path, "r") as f: + for line in f: + if line.strip().startswith("game ("): + count += 1 + print(f"Total filesets present - {count}.") diff --git a/fileset.py b/fileset.py index 9b9dc935..605d831f 100644 --- a/fileset.py +++ b/fileset.py @@ -15,7 +15,6 @@ ) from pagination import create_page import difflib -from pymysql.converters import escape_string from db_functions import ( find_matching_filesets, get_all_related_filesets, @@ -23,28 +22,15 @@ user_integrity_check, db_connect, create_log, + db_connect_root, ) from collections import defaultdict +from schema import init_database app = Flask(__name__) secret_key = os.urandom(24) -base_dir = os.path.dirname(os.path.abspath(__file__)) -config_path = os.path.join(base_dir, "mysql_config.json") -with open(config_path) as f: - mysql_cred = json.load(f) - -conn = pymysql.connect( - host=mysql_cred["servername"], - user=mysql_cred["username"], - password=mysql_cred["password"], - db=mysql_cred["dbname"], - charset="utf8mb4", - cursorclass=pymysql.cursors.DictCursor, - autocommit=False, -) - @app.route("/") def index(): @@ -55,7 +41,12 @@ def index(): -

Fileset Database

+ +

Fileset Database

Fileset Actions