9
9
from collections import defaultdict
10
10
import re
11
11
import copy
12
+ import sys
12
13
13
14
SPECIAL_SYMBOLS = '/":*|\\ ?%<>\x7f '
14
15
15
16
16
17
def db_connect ():
18
+ console_log ("Connecting to the Database." )
17
19
base_dir = os .path .dirname (os .path .abspath (__file__ ))
18
20
config_path = os .path .join (base_dir , "mysql_config.json" )
19
21
with open (config_path ) as f :
@@ -28,7 +30,7 @@ def db_connect():
28
30
cursorclass = pymysql .cursors .DictCursor ,
29
31
autocommit = False ,
30
32
)
31
-
33
+ console_log ( f"Connected to Database - { mysql_cred [ 'dbname' ] } " )
32
34
return conn
33
35
34
36
@@ -526,12 +528,17 @@ def db_insert(data_arr, username=None, skiplog=False):
526
528
transaction_id = temp + 1
527
529
528
530
category_text = f"Uploaded from { src } "
529
- log_text = f"Started loading DAT file, size { os .path .getsize (filepath )} , author { author } , version { version } . State { status } . Transaction: { transaction_id } "
531
+ log_text = f"Started loading DAT file { filepath } , size { os .path .getsize (filepath )} , author { author } , version { version } . State { status } . Transaction: { transaction_id } "
530
532
531
533
user = f"cli:{ getpass .getuser ()} " if username is None else username
532
534
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
533
535
536
+ console_log (log_text )
537
+ console_log_total_filesets (filepath )
538
+
539
+ fileset_count = 1
534
540
for fileset in game_data :
541
+ console_log_detection (fileset_count )
535
542
key = calc_key (fileset )
536
543
megakey = calc_megakey (fileset )
537
544
@@ -555,7 +562,7 @@ def db_insert(data_arr, username=None, skiplog=False):
555
562
if existing_entry is not None :
556
563
log_text = f"Skipping Entry as similar entry already exsits - Fileset:{ existing_entry ['id' ]} . Skpped entry details - engineid = { engineid } , gameid = { gameid } , platform = { platform } , language = { lang } "
557
564
create_log ("Warning" , user , escape_string (log_text ), conn )
558
- print (log_text )
565
+ console_log (log_text )
559
566
continue
560
567
561
568
insert_game (
@@ -594,6 +601,8 @@ def db_insert(data_arr, username=None, skiplog=False):
594
601
if key not in ["name" , "size" , "size-r" , "size-rd" , "sha1" , "crc" ]:
595
602
insert_filechecksum (file , key , file_id , conn )
596
603
604
+ fileset_count += 1
605
+
597
606
if detection :
598
607
conn .cursor ().execute (
599
608
"UPDATE fileset SET status = 'obsolete' WHERE `timestamp` != FROM_UNIXTIME(@fileset_time_last) AND status = 'detection'"
@@ -607,6 +616,7 @@ def db_insert(data_arr, username=None, skiplog=False):
607
616
fileset_insertion_count = cur .fetchone ()["COUNT(fileset)" ]
608
617
category_text = f"Uploaded from { src } "
609
618
log_text = f"Completed loading DAT file, filename { filepath } , size { os .path .getsize (filepath )} , author { author } , version { version } . State { status } . Number of filesets: { fileset_insertion_count } . Transaction: { transaction_id } "
619
+ console_log (log_text )
610
620
except Exception as e :
611
621
print ("Inserting failed:" , e )
612
622
else :
@@ -871,8 +881,9 @@ def match_fileset(data_arr, username=None, skiplog=False):
871
881
transaction_id = transaction_id + 1 if transaction_id else 1
872
882
873
883
category_text = f"Uploaded from { src } "
874
- log_text = f"Started loading DAT file, size { os .path .getsize (filepath )} , author { author } , version { version } . State { source_status } . Transaction: { transaction_id } "
875
-
884
+ log_text = f"Started loading DAT file { filepath } , size { os .path .getsize (filepath )} , author { author } , version { version } . State { source_status } . Transaction: { transaction_id } "
885
+ console_log (log_text )
886
+ console_log_total_filesets (filepath )
876
887
user = f"cli:{ getpass .getuser ()} " if username is None else username
877
888
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
878
889
@@ -941,6 +952,9 @@ def set_process(
941
952
mismatch_filesets = 0
942
953
dropped_early_no_candidate = 0
943
954
dropped_early_single_candidate_multiple_sets = 0
955
+
956
+ fileset_count = 0
957
+
944
958
# A mapping from set filesets to candidate filesets list
945
959
set_to_candidate_dict = defaultdict (list )
946
960
id_to_fileset_dict = defaultdict (dict )
@@ -995,12 +1009,12 @@ def set_process(
995
1009
engine_name = fileset ["sourcefile" ].split ("-" )[0 ]
996
1010
997
1011
if engine_name == "glk" :
998
- candidate_filesets = set_glk_filter_candidate_filesets (
999
- fileset_id , fileset , transaction_id , engine_name , conn
1012
+ ( candidate_filesets , fileset_count ) = set_glk_filter_candidate_filesets (
1013
+ fileset_id , fileset , fileset_count , transaction_id , engine_name , conn
1000
1014
)
1001
1015
else :
1002
- candidate_filesets = set_filter_candidate_filesets (
1003
- fileset_id , fileset , transaction_id , conn
1016
+ ( candidate_filesets , fileset_count ) = set_filter_candidate_filesets (
1017
+ fileset_id , fileset , fileset_count , transaction_id , conn
1004
1018
)
1005
1019
1006
1020
# Mac files in set.dat are not represented properly and they won't find a candidate fileset for a match, so we can drop them.
@@ -1016,10 +1030,18 @@ def set_process(
1016
1030
)
1017
1031
dropped_early_no_candidate += 1
1018
1032
delete_original_fileset (fileset_id , conn )
1019
-
1020
1033
id_to_fileset_dict [fileset_id ] = fileset
1021
1034
set_to_candidate_dict [fileset_id ].extend (candidate_filesets )
1022
1035
1036
+ console_message = "Candidate filtering finished."
1037
+ console_log (console_message )
1038
+ console_message = (
1039
+ f"{ dropped_early_no_candidate } Filesets Dropped - No candidates found."
1040
+ )
1041
+ console_log (console_message )
1042
+ console_message = "Looking for duplicates..."
1043
+ console_log (console_message )
1044
+
1023
1045
# Remove all such filesets, which have many to one mapping with a single candidate, those are extra variants.
1024
1046
value_to_keys = defaultdict (list )
1025
1047
for set_fileset , candidates in set_to_candidate_dict .items ():
@@ -1052,6 +1074,7 @@ def set_process(
1052
1074
fileset ["description" ] if "description" in fileset else ""
1053
1075
)
1054
1076
log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: { fileset_name } , Description: { fileset_description } . Clashed with Fileset:{ candidate } ({ engine } :{ gameid } -{ platform } -{ language } )"
1077
+ console_log (log_text )
1055
1078
create_log (
1056
1079
escape_string (category_text ), user , escape_string (log_text ), conn
1057
1080
)
@@ -1062,7 +1085,9 @@ def set_process(
1062
1085
1063
1086
manual_merge_map = defaultdict (list )
1064
1087
1088
+ match_count = 1
1065
1089
for fileset_id , candidate_filesets in set_to_candidate_dict .items ():
1090
+ console_log_matching (match_count )
1066
1091
fileset = id_to_fileset_dict [fileset_id ]
1067
1092
1068
1093
# Filter by platform to reduce manual merge
@@ -1092,21 +1117,15 @@ def set_process(
1092
1117
skiplog ,
1093
1118
)
1094
1119
1095
- # print(manual_merge_map)
1120
+ match_count += 1
1121
+ console_log ("Matching performed." )
1096
1122
1097
1123
for fileset_id , candidates in manual_merge_map .items ():
1098
1124
category_text = "Manual Merge Required"
1099
1125
log_text = f"Merge Fileset:{ fileset_id } manually. Possible matches are: { ', ' .join (f'Fileset:{ id } ' for id in candidates )} ."
1100
1126
manual_merged_filesets += 1
1101
- # print(candidates)
1102
1127
add_manual_merge (
1103
- candidates ,
1104
- fileset_id ,
1105
- category_text ,
1106
- log_text ,
1107
- log_text ,
1108
- user ,
1109
- conn ,
1128
+ candidates , fileset_id , category_text , log_text , user , conn , log_text
1110
1129
)
1111
1130
1112
1131
# Final log
@@ -1121,6 +1140,7 @@ def set_process(
1121
1140
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
1122
1141
category_text = "Upload information"
1123
1142
log_text = f"Number of filesets: { fileset_insertion_count } . Filesets automatically merged: { auto_merged_filesets } . Filesets dropped early (no candidate) - { dropped_early_no_candidate } . Filesets dropped early (mapping to single detection) - { dropped_early_single_candidate_multiple_sets } . Filesets requiring manual merge: { manual_merged_filesets } . Partial/Full filesets already present: { fully_matched_filesets } . Partial/Full filesets with mismatch { mismatch_filesets } ."
1143
+ console_log (log_text )
1124
1144
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
1125
1145
1126
1146
@@ -1225,14 +1245,13 @@ def set_perform_match(
1225
1245
else :
1226
1246
category_text = "Mismatch"
1227
1247
log_text = f"Fileset:{ fileset_id } mismatched with Fileset:{ matched_fileset_id } with status:{ status } . Try manual merge."
1228
- print_text = f"Merge Fileset:{ fileset_id } manually with Fileset:{ matched_fileset_id } . Unmatched files: { len (unmatched_files )} ."
1248
+ # print_text = f"Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}. Unmatched files: {len(unmatched_files)}."
1229
1249
mismatch_filesets += 1
1230
1250
add_manual_merge (
1231
1251
[matched_fileset_id ],
1232
1252
fileset_id ,
1233
1253
category_text ,
1234
1254
log_text ,
1235
- print_text ,
1236
1255
user ,
1237
1256
conn ,
1238
1257
)
@@ -1340,7 +1359,7 @@ def remove_manual_merge(
1340
1359
1341
1360
1342
1361
def add_manual_merge (
1343
- child_filesets , parent_fileset , category_text , log_text , print_text , user , conn
1362
+ child_filesets , parent_fileset , category_text , log_text , user , conn , print_text = None
1344
1363
):
1345
1364
"""
1346
1365
Adds the manual merge entries to a table called possible_merges.
@@ -1356,7 +1375,8 @@ def add_manual_merge(
1356
1375
cursor .execute (query , (child_fileset , parent_fileset ))
1357
1376
1358
1377
create_log (escape_string (category_text ), user , escape_string (log_text ), conn )
1359
- print (print_text )
1378
+ if print_text :
1379
+ print (print_text )
1360
1380
1361
1381
1362
1382
def is_full_checksum_match (candidate_fileset , fileset , conn ):
@@ -1395,14 +1415,15 @@ def is_full_checksum_match(candidate_fileset, fileset, conn):
1395
1415
1396
1416
1397
1417
def set_glk_filter_candidate_filesets (
1398
- fileset_id , fileset , transaction_id , engine_name , conn
1418
+ fileset_id , fileset , fileset_count , transaction_id , engine_name , conn
1399
1419
):
1400
1420
"""
1401
1421
Returns a list of candidate filesets for glk engines that can be merged
1402
1422
"""
1403
1423
with conn .cursor () as cursor :
1404
1424
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1405
-
1425
+ fileset_count += 1
1426
+ console_log_candidate_filtering (fileset_count )
1406
1427
query = """
1407
1428
WITH candidate_fileset AS (
1408
1429
SELECT fs.id AS fileset_id, f.size
@@ -1469,16 +1490,19 @@ def set_glk_filter_candidate_filesets(
1469
1490
for row in rows :
1470
1491
candidates .append (row ["fileset_id" ])
1471
1492
1472
- return candidates
1493
+ return ( candidates , fileset_count )
1473
1494
1474
1495
1475
- def set_filter_candidate_filesets (fileset_id , fileset , transaction_id , conn ):
1496
+ def set_filter_candidate_filesets (
1497
+ fileset_id , fileset , fileset_count , transaction_id , conn
1498
+ ):
1476
1499
"""
1477
1500
Returns a list of candidate filesets that can be merged
1478
1501
"""
1479
1502
with conn .cursor () as cursor :
1480
1503
# Returns those filesets which have all detection files matching in the set fileset filtered by engine, file name and file size(if not -1) sorted in descending order of matches
1481
-
1504
+ fileset_count += 1
1505
+ console_log_candidate_filtering (fileset_count )
1482
1506
query = """
1483
1507
WITH candidate_fileset AS (
1484
1508
SELECT fs.id AS fileset_id, f.name, f.size
@@ -1536,7 +1560,7 @@ def set_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
1536
1560
for row in rows :
1537
1561
candidates .append (row ["fileset_id" ])
1538
1562
1539
- return candidates
1563
+ return ( candidates , fileset_count )
1540
1564
1541
1565
1542
1566
def process_fileset (
@@ -2265,3 +2289,33 @@ def add_usercount(fileset, conn):
2265
2289
cursor .execute (
2266
2290
f"UPDATE fileset SET status = 'ReadyForReview' WHERE id = { fileset } "
2267
2291
)
2292
+
2293
+
2294
+ def console_log (message ):
2295
+ sys .stdout .write (" " * 50 + "\r " )
2296
+ sys .stdout .flush ()
2297
+ print (message )
2298
+
2299
+
2300
+ def console_log_candidate_filtering (fileset_count ):
2301
+ sys .stdout .write (f"Filtering Candidates - Fileset { fileset_count } \r " )
2302
+ sys .stdout .flush ()
2303
+
2304
+
2305
+ def console_log_matching (fileset_count ):
2306
+ sys .stdout .write (f"Performing Match - Fileset { fileset_count } \r " )
2307
+ sys .stdout .flush ()
2308
+
2309
+
2310
+ def console_log_detection (fileset_count ):
2311
+ sys .stdout .write (f"Processing - Fileset { fileset_count } \r " )
2312
+ sys .stdout .flush ()
2313
+
2314
+
2315
+ def console_log_total_filesets (file_path ):
2316
+ count = 0
2317
+ with open (file_path , "r" ) as f :
2318
+ for line in f :
2319
+ if line .strip ().startswith ("game (" ):
2320
+ count += 1
2321
+ print (f"Total filesets present - { count } ." )
0 commit comments