77from  pathlib  import  Path 
88from  random  import  randint 
99from  statistics  import  mean , stdev 
10- from  typing  import  Generator 
10+ from  typing  import  Generator ,  TextIO 
1111from  uuid  import  uuid4 
1212
1313import  pycrfsuite 
1414from  sklearn .model_selection  import  train_test_split 
15- from  tqdm  import  tqdm 
15+ from  tabulate  import  tabulate 
1616
1717from  .test_results_to_detailed_results  import  test_results_to_detailed_results 
1818from  .test_results_to_html  import  test_results_to_html 
2121    DataVectors ,
2222    Stats ,
2323    confusion_matrix ,
24+     convert_num_ordinal ,
2425    evaluate ,
2526    load_datasets ,
2627)
2930
3031
3132@contextmanager  
32- def  change_log_level (level : int ) ->  Generator [None ]:
33+ def  change_log_level (level : int ) ->  Generator [None ,  None ,  None ]:
3334    """Context manager to temporarily change logging level within the context. 
3435
3536    On exiting the context, the original level is restored. 
@@ -50,6 +51,30 @@ def change_log_level(level: int) -> Generator[None]:
5051    logger .setLevel (original_level )
5152
5253
54+ @contextmanager  
55+ def  set_redirect_log_stream (io_stream : TextIO ) ->  Generator [None , None , None ]:
56+     """Context manager to accept io_stream for logging used in web app 
57+         Required by web app as it bypasses train.py where logging is configured 
58+ 
59+     Parameters 
60+     ---------- 
61+     io_stream : TextIO 
62+         io.IOString() stream 
63+ 
64+     Yields 
65+     ------ 
66+     Generator[None, None, None] 
67+         Generator, yielding None 
68+     """ 
69+     logging .basicConfig (
70+         stream = io_stream ,
71+         level = logging .INFO ,
72+         format = "[%(levelname)s] (%(module)s) %(message)s" ,
73+     )
74+ 
75+     yield 
76+ 
77+ 
5378def  train_parser_model (
5479    vectors : DataVectors ,
5580    split : float ,
@@ -189,6 +214,15 @@ def train_parser_model(
189214    return  stats 
190215
191216
217+ def  train_parser_model_bypass_logging (* kargs ) ->  Stats :
218+     stats  =  None 
219+     with  change_log_level (
220+         logging .WARNING 
221+     ):  # Temporarily stop logging below WARNING for multi-processing 
222+         stats  =  train_parser_model (* kargs )
223+     return  stats 
224+ 
225+ 
192226def  train_single (args : argparse .Namespace ) ->  None :
193227    """Train CRF model once. 
194228
@@ -222,15 +256,31 @@ def train_single(args: argparse.Namespace) -> None:
222256        combine_name_labels = args .combine_name_labels ,
223257    )
224258
225-     print ("Sentence-level results:" )
226-     print (f"\t Accuracy: { 100  *  stats .sentence .accuracy :.2f}  %" )
259+     headers  =  ["Sentence-level results" , "Word-level results" ]
260+     table  =  []
261+ 
262+     table .append (
263+         [
264+             f"Accuracy: { 100  *  stats .sentence .accuracy :.2f}  %" ,
265+             f"Accuracy: { 100  *  stats .token .accuracy :.2f}  %\n " 
266+             f"Precision (micro) { 100  *  stats .token .weighted_avg .precision :.2f}  %\n " 
267+             f"Recall (micro) { 100  *  stats .token .weighted_avg .recall :.2f}  %\n " 
268+             f"F1 score (micro) { 100  *  stats .token .weighted_avg .f1_score :.2f}  %" ,
269+         ]
270+     )
227271
228-     print ()
229-     print ("Word-level results:" )
230-     print (f"\t Accuracy { 100  *  stats .token .accuracy :.2f}  %" )
231-     print (f"\t Precision (micro) { 100  *  stats .token .weighted_avg .precision :.2f}  %" )
232-     print (f"\t Recall (micro) { 100  *  stats .token .weighted_avg .recall :.2f}  %" )
233-     print (f"\t F1 score (micro) { 100  *  stats .token .weighted_avg .f1_score :.2f}  %" )
272+     print (
273+         "\n " 
274+         +  tabulate (
275+             table ,
276+             headers = headers ,
277+             tablefmt = "fancy_grid" ,
278+             maxcolwidths = [None , None ],
279+             stralign = "left" ,
280+             numalign = "right" ,
281+         )
282+         +  "\n " 
283+     )
234284
235285
236286def  train_multiple (args : argparse .Namespace ) ->  None :
@@ -272,13 +322,15 @@ def train_multiple(args: argparse.Namespace) -> None:
272322        for  _  in  range (args .runs )
273323    ]
274324
275-     with  change_log_level (logging .WARNING ):  # Temporarily stop logging below WARNING 
276-         with  cf .ProcessPoolExecutor (max_workers = args .processes ) as  executor :
277-             futures  =  [executor .submit (train_parser_model , * a ) for  a  in  arguments ]
278-             eval_results  =  [
279-                 future .result ()
280-                 for  future  in  tqdm (cf .as_completed (futures ), total = len (futures ))
281-             ]
325+     word_accuracies , sentence_accuracies , seeds , eval_results  =  [], [], [], []
326+     with  cf .ProcessPoolExecutor (max_workers = args .processes ) as  executor :
327+         futures  =  [
328+             executor .submit (train_parser_model_bypass_logging , * a ) for  a  in  arguments 
329+         ]
330+         logger .info (f"Queued for { args .runs }   separate runs" )
331+         for  idx , future  in  enumerate (cf .as_completed (futures )):
332+             logger .info (f"{ convert_num_ordinal (idx  +  1 )}   run completed" )
333+             eval_results .append (future .result ())
282334
283335    word_accuracies , sentence_accuracies , seeds  =  [], [], []
284336    for  result  in  eval_results :
@@ -288,15 +340,9 @@ def train_multiple(args: argparse.Namespace) -> None:
288340
289341    sentence_mean  =  100  *  mean (sentence_accuracies )
290342    sentence_uncertainty  =  3  *  100  *  stdev (sentence_accuracies )
291-     print ()
292-     print ("Average sentence-level accuracy:" )
293-     print (f"\t -> { sentence_mean :.2f}  % ± { sentence_uncertainty :.2f}  %" )
294343
295344    word_mean  =  100  *  mean (word_accuracies )
296345    word_uncertainty  =  3  *  100  *  stdev (word_accuracies )
297-     print ()
298-     print ("Average word-level accuracy:" )
299-     print (f"\t -> { word_mean :.2f}  % ± { word_uncertainty :.2f}  %" )
300346
301347    index_best  =  max (
302348        range (len (sentence_accuracies )), key = sentence_accuracies .__getitem__ 
@@ -310,6 +356,42 @@ def train_multiple(args: argparse.Namespace) -> None:
310356    min_sent  =  100  *  sentence_accuracies [index_worst ]
311357    min_word  =  100  *  word_accuracies [index_worst ]
312358    min_seed  =  seeds [index_worst ]
313-     print ()
314-     print (f"Best:  Sentence { max_sent :.2f}  % / Word { max_word :.2f}  % (Seed: { max_seed }  )" )
315-     print (f"Worst: Sentence { min_sent :.2f}  % / Word { min_word :.2f}  % (Seed: { min_seed }  )" )
359+ 
360+     headers  =  ["Run" , "Word/Token accuracy" , "Sentence accuracy" , "Seed" ]
361+ 
362+     table  =  []
363+     for  idx , result  in  enumerate (eval_results ):
364+         table .append (
365+             [
366+                 convert_num_ordinal (idx  +  1 ),
367+                 f"{ 100  *  result .token .accuracy :.2f}  %" ,
368+                 f"{ 100  *  result .sentence .accuracy :.2f}  %" ,
369+                 f"{ result .seed }  " ,
370+             ]
371+         )
372+ 
373+     table .append (["-" ] *  len (headers ))
374+     table .append (
375+         [
376+             "Average" ,
377+             f"{ word_mean :.2f}  % ± { word_uncertainty :.2f}  %" ,
378+             f"{ sentence_mean :.2f}  % ± { sentence_uncertainty :.2f}  %" ,
379+             f"{ max_seed }  " ,
380+         ]
381+     )
382+     table .append (["-" ] *  len (headers ))
383+     table .append (["Best" , f"{ max_word :.2f}  %" , f"{ max_sent :.2f}  %" , f"{ max_seed }  " ])
384+     table .append (["Worst" , f"{ min_word :.2f}  %" , f"{ min_sent :.2f}  %" , f"{ min_seed }  " ])
385+ 
386+     print (
387+         "\n " 
388+         +  tabulate (
389+             table ,
390+             headers = headers ,
391+             tablefmt = "fancy_grid" ,
392+             maxcolwidths = [None , None , None , None ],
393+             stralign = "left" ,
394+             numalign = "right" ,
395+         )
396+         +  "\n " 
397+     )
0 commit comments