Skip to content

Commit bbd9054

Browse files
committed
Update env var RANK to NODE_RANK.
1 parent 435a44f commit bbd9054

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

ads/jobs/builders/runtimes/pytorch_runtime.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ def run(self, dsc_job, **kwargs):
205205
if not envs:
206206
envs = {}
207207
# Huggingface accelerate requires machine rank
208-
envs["RANK"] = str(i)
208+
# Here we use NODE_RANK to store the machine rank
209+
envs["NODE_RANK"] = str(i)
209210
envs["WORLD_SIZE"] = str(replicas)
210211
if main_run:
211212
envs["MAIN_JOB_RUN_OCID"] = main_run.id

ads/jobs/templates/driver_pytorch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ def __init__(self, code_dir: str = driver_utils.DEFAULT_CODE_DIR) -> None:
694694
# --multi_gpu will be set automatically if there is more than 1 GPU
695695
# self.multi_gpu = bool(self.node_count > 1 or self.gpu_count > 1)
696696
self.num_machines = self.node_count
697-
self.machine_rank = os.environ["RANK"]
697+
self.machine_rank = os.environ["NODE_RANK"]
698698
# Total number of processes across all nodes
699699
# Here we assume all nodes are having the same shape
700700
self.num_processes = (self.gpu_count if self.gpu_count else 1) * self.node_count

0 commit comments

Comments
 (0)