Skip to content

Commit e86f982

Browse files
committed
INTEGRITY: Improve console logging with progress update.
1 parent c38881c commit e86f982

File tree

1 file changed

+83
-29
lines changed

1 file changed

+83
-29
lines changed

db_functions.py

Lines changed: 83 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
from collections import defaultdict
1010
import re
1111
import copy
12+
import sys
1213

1314
SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
1415

1516

1617
def db_connect():
18+
console_log("Connecting to the Database.")
1719
base_dir = os.path.dirname(os.path.abspath(__file__))
1820
config_path = os.path.join(base_dir, "mysql_config.json")
1921
with open(config_path) as f:
@@ -28,7 +30,7 @@ def db_connect():
2830
cursorclass=pymysql.cursors.DictCursor,
2931
autocommit=False,
3032
)
31-
33+
console_log(f"Connected to Database - {mysql_cred['dbname']}")
3234
return conn
3335

3436

@@ -526,12 +528,17 @@ def db_insert(data_arr, username=None, skiplog=False):
526528
transaction_id = temp + 1
527529

528530
category_text = f"Uploaded from {src}"
529-
log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
531+
log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
530532

531533
user = f"cli:{getpass.getuser()}" if username is None else username
532534
create_log(escape_string(category_text), user, escape_string(log_text), conn)
533535

536+
console_log(log_text)
537+
console_log_total_filesets(filepath)
538+
539+
fileset_count = 1
534540
for fileset in game_data:
541+
console_log_detection(fileset_count)
535542
key = calc_key(fileset)
536543
megakey = calc_megakey(fileset)
537544

@@ -555,7 +562,7 @@ def db_insert(data_arr, username=None, skiplog=False):
555562
if existing_entry is not None:
556563
log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
557564
create_log("Warning", user, escape_string(log_text), conn)
558-
print(log_text)
565+
console_log(log_text)
559566
continue
560567

561568
insert_game(
@@ -594,6 +601,8 @@ def db_insert(data_arr, username=None, skiplog=False):
594601
if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
595602
insert_filechecksum(file, key, file_id, conn)
596603

604+
fileset_count += 1
605+
597606
if detection:
598607
conn.cursor().execute(
599608
"UPDATE fileset SET status = 'obsolete' WHERE `timestamp` != FROM_UNIXTIME(@fileset_time_last) AND status = 'detection'"
@@ -607,6 +616,7 @@ def db_insert(data_arr, username=None, skiplog=False):
607616
fileset_insertion_count = cur.fetchone()["COUNT(fileset)"]
608617
category_text = f"Uploaded from {src}"
609618
log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
619+
console_log(log_text)
610620
except Exception as e:
611621
print("Inserting failed:", e)
612622
else:
@@ -871,8 +881,9 @@ def match_fileset(data_arr, username=None, skiplog=False):
871881
transaction_id = transaction_id + 1 if transaction_id else 1
872882

873883
category_text = f"Uploaded from {src}"
874-
log_text = f"Started loading DAT file, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
875-
884+
log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
885+
console_log(log_text)
886+
console_log_total_filesets(filepath)
876887
user = f"cli:{getpass.getuser()}" if username is None else username
877888
create_log(escape_string(category_text), user, escape_string(log_text), conn)
878889

@@ -941,6 +952,9 @@ def set_process(
941952
mismatch_filesets = 0
942953
dropped_early_no_candidate = 0
943954
dropped_early_single_candidate_multiple_sets = 0
955+
956+
fileset_count = 0
957+
944958
# A mapping from set filesets to candidate filesets list
945959
set_to_candidate_dict = defaultdict(list)
946960
id_to_fileset_dict = defaultdict(dict)
@@ -995,12 +1009,12 @@ def set_process(
9951009
engine_name = fileset["sourcefile"].split("-")[0]
9961010

9971011
if engine_name == "glk":
998-
candidate_filesets = set_glk_filter_candidate_filesets(
999-
fileset_id, fileset, transaction_id, engine_name, conn
1012+
(candidate_filesets, fileset_count) = set_glk_filter_candidate_filesets(
1013+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
10001014
)
10011015
else:
1002-
candidate_filesets = set_filter_candidate_filesets(
1003-
fileset_id, fileset, transaction_id, conn
1016+
(candidate_filesets, fileset_count) = set_filter_candidate_filesets(
1017+
fileset_id, fileset, fileset_count, transaction_id, conn
10041018
)
10051019

10061020
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
@@ -1016,10 +1030,18 @@ def set_process(
10161030
)
10171031
dropped_early_no_candidate += 1
10181032
delete_original_fileset(fileset_id, conn)
1019-
10201033
id_to_fileset_dict[fileset_id] = fileset
10211034
set_to_candidate_dict[fileset_id].extend(candidate_filesets)
10221035

1036+
console_message = "Candidate filtering finished."
1037+
console_log(console_message)
1038+
console_message = (
1039+
f"{dropped_early_no_candidate} Filesets Dropped - No candidates found."
1040+
)
1041+
console_log(console_message)
1042+
console_message = "Looking for duplicates..."
1043+
console_log(console_message)
1044+
10231045
# Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants.
10241046
value_to_keys = defaultdict(list)
10251047
for set_fileset, candidates in set_to_candidate_dict.items():
@@ -1052,6 +1074,7 @@ def set_process(
10521074
fileset["description"] if "description" in fileset else ""
10531075
)
10541076
log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
1077+
console_log(log_text)
10551078
create_log(
10561079
escape_string(category_text), user, escape_string(log_text), conn
10571080
)
@@ -1062,7 +1085,9 @@ def set_process(
10621085

10631086
manual_merge_map = defaultdict(list)
10641087

1088+
match_count = 1
10651089
for fileset_id, candidate_filesets in set_to_candidate_dict.items():
1090+
console_log_matching(match_count)
10661091
fileset = id_to_fileset_dict[fileset_id]
10671092

10681093
# Filter by platform to reduce manual merge
@@ -1092,21 +1117,15 @@ def set_process(
10921117
skiplog,
10931118
)
10941119

1095-
# print(manual_merge_map)
1120+
match_count += 1
1121+
console_log("Matching performed.")
10961122

10971123
for fileset_id, candidates in manual_merge_map.items():
10981124
category_text = "Manual Merge Required"
10991125
log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
11001126
manual_merged_filesets += 1
1101-
# print(candidates)
11021127
add_manual_merge(
1103-
candidates,
1104-
fileset_id,
1105-
category_text,
1106-
log_text,
1107-
log_text,
1108-
user,
1109-
conn,
1128+
candidates, fileset_id, category_text, log_text, user, conn, log_text
11101129
)
11111130

11121131
# Final log
@@ -1121,6 +1140,7 @@ def set_process(
11211140
create_log(escape_string(category_text), user, escape_string(log_text), conn)
11221141
category_text = "Upload information"
11231142
log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {auto_merged_filesets}. Filesets dropped early (no candidate) - {dropped_early_no_candidate}. Filesets dropped early (mapping to single detection) - {dropped_early_single_candidate_multiple_sets}. Filesets requiring manual merge: {manual_merged_filesets}. Partial/Full filesets already present: {fully_matched_filesets}. Partial/Full filesets with mismatch {mismatch_filesets}."
1143+
console_log(log_text)
11241144
create_log(escape_string(category_text), user, escape_string(log_text), conn)
11251145

11261146

@@ -1225,14 +1245,13 @@ def set_perform_match(
12251245
else:
12261246
category_text = "Mismatch"
12271247
log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge."
1228-
print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
1248+
# print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
12291249
mismatch_filesets += 1
12301250
add_manual_merge(
12311251
[matched_fileset_id],
12321252
fileset_id,
12331253
category_text,
12341254
log_text,
1235-
print_text,
12361255
user,
12371256
conn,
12381257
)
@@ -1340,7 +1359,7 @@ def remove_manual_merge(
13401359

13411360

13421361
def add_manual_merge(
1343-
child_filesets, parent_fileset, category_text, log_text, print_text, user, conn
1362+
child_filesets, parent_fileset, category_text, log_text, user, conn, print_text=None
13441363
):
13451364
"""
13461365
Adds the manual merge entries to a table called possible_merges.
@@ -1356,7 +1375,8 @@ def add_manual_merge(
13561375
cursor.execute(query, (child_fileset, parent_fileset))
13571376

13581377
create_log(escape_string(category_text), user, escape_string(log_text), conn)
1359-
print(print_text)
1378+
if print_text:
1379+
print(print_text)
13601380

13611381

13621382
def is_full_checksum_match(candidate_fileset, fileset, conn):
@@ -1395,14 +1415,15 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
13951415

13961416

13971417
def set_glk_filter_candidate_filesets(
1398-
fileset_id, fileset, transaction_id, engine_name, conn
1418+
fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
13991419
):
14001420
"""
14011421
Returns a list of candidate filesets for glk engines that can be merged
14021422
"""
14031423
with conn.cursor() as cursor:
14041424
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1405-
1425+
fileset_count += 1
1426+
console_log_candidate_filtering(fileset_count)
14061427
query = """
14071428
WITH candidate_fileset AS (
14081429
SELECT fs.id AS fileset_id, f.size
@@ -1469,16 +1490,19 @@ def set_glk_filter_candidate_filesets(
14691490
for row in rows:
14701491
candidates.append(row["fileset_id"])
14711492

1472-
return candidates
1493+
return (candidates, fileset_count)
14731494

14741495

1475-
def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
1496+
def set_filter_candidate_filesets(
1497+
fileset_id, fileset, fileset_count, transaction_id, conn
1498+
):
14761499
"""
14771500
Returns a list of candidate filesets that can be merged
14781501
"""
14791502
with conn.cursor() as cursor:
14801503
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1481-
1504+
fileset_count += 1
1505+
console_log_candidate_filtering(fileset_count)
14821506
query = """
14831507
WITH candidate_fileset AS (
14841508
SELECT fs.id AS fileset_id, f.name, f.size
@@ -1536,7 +1560,7 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
15361560
for row in rows:
15371561
candidates.append(row["fileset_id"])
15381562

1539-
return candidates
1563+
return (candidates, fileset_count)
15401564

15411565

15421566
def process_fileset(
@@ -2265,3 +2289,33 @@ def add_usercount(fileset, conn):
22652289
cursor.execute(
22662290
f"UPDATE fileset SET status = 'ReadyForReview' WHERE id = {fileset}"
22672291
)
2292+
2293+
2294+
def console_log(message):
2295+
sys.stdout.write(" " * 50 + "\r")
2296+
sys.stdout.flush()
2297+
print(message)
2298+
2299+
2300+
def console_log_candidate_filtering(fileset_count):
2301+
sys.stdout.write(f"Filtering Candidates - Fileset {fileset_count}\r")
2302+
sys.stdout.flush()
2303+
2304+
2305+
def console_log_matching(fileset_count):
2306+
sys.stdout.write(f"Performing Match - Fileset {fileset_count}\r")
2307+
sys.stdout.flush()
2308+
2309+
2310+
def console_log_detection(fileset_count):
2311+
sys.stdout.write(f"Processing - Fileset {fileset_count}\r")
2312+
sys.stdout.flush()
2313+
2314+
2315+
def console_log_total_filesets(file_path):
2316+
count = 0
2317+
with open(file_path, "r") as f:
2318+
for line in f:
2319+
if line.strip().startswith("game ("):
2320+
count += 1
2321+
print(f"Total filesets present - {count}.")

0 commit comments

Comments
 (0)