File tree Expand file tree Collapse file tree 1 file changed +13
-5
lines changed Expand file tree Collapse file tree 1 file changed +13
-5
lines changed Original file line number Diff line number Diff line change @@ -436,12 +436,13 @@ def _initialize_writers(self, only_initialize_if_missing=False) -> None:
436436 self .first_process = True
437437 self .logger .info (f"Hook is writing from the hook with pid: { os .getpid ()} \n " )
438438 else :
439+ if self .first_process is None :
440+ self .logger .warn (
441+ f"Unsupported Distributed Training Strategy Detected. \
442+ Sagemaker-Debugger will only write from one process. \
443+ The process with pid: { os .getpid ()} will not be writing any data. \n "
444+ )
439445 self .first_process = False
440- self .logger .warn (
441- f"Unsupported Distributed Training Strategy Detected.\n \
442- Sagemaker-Debugger will only write from one process.\n \
443- The process with pid: { os .getpid ()} will not be writing any data. \n "
444- )
445446 return
446447
447448 if self .save_all_workers is False :
@@ -546,6 +547,13 @@ def set_mode(self, mode):
546547
547548 def export_collections (self ):
548549 num_workers = self ._get_num_workers ()
550+ if num_workers == 1 and self .first_process is False :
551+ self .logger .warn (
552+ f"Unsupported Distributed Training Strategy Detected. \
553+ Sagemaker-Debugger will only write from one process. \
554+ The process with pid: { os .getpid ()} will not be writing any data. \n "
555+ )
556+ return
549557 if self .save_all_workers is False :
550558 if self .chief_worker != self .worker :
551559 return
You can’t perform that action at this time.
0 commit comments