File tree Expand file tree Collapse file tree 1 file changed +7
-0
lines changed Expand file tree Collapse file tree 1 file changed +7
-0
lines changed Original file line number Diff line number Diff line change @@ -397,6 +397,7 @@ def run_command(
397
397
shell = True ,
398
398
)
399
399
# Stream the outputs
400
+ logger .debug ("Streaming command output from subprocess %s" , process .pid )
400
401
while True :
401
402
output = process .stdout .readline ()
402
403
if process .poll () is not None and output == b"" :
@@ -411,9 +412,15 @@ def run_command(
411
412
# logging will add line break
412
413
msg = msg .rstrip ("\n " )
413
414
logger .log (level = level , msg = msg )
415
+ if "pdsh@" in msg and "ssh exited with exit code 1" in msg :
416
+ print ("DeepSpeed Failed." )
417
+ sys .exit (1 )
414
418
# Add a small delay so that
415
419
# outputs from the subsequent code will have different timestamp for oci logging
416
420
time .sleep (0.02 )
421
+ logger .debug (
422
+ "subprocess %s returned exit code %s" , process .pid , process .returncode
423
+ )
417
424
if check and process .returncode != 0 :
418
425
# If there is an error, exit the main process with the same return code.
419
426
sys .exit (process .returncode )
You can’t perform that action at this time.
0 commit comments