@@ -18,13 +18,16 @@ Scheduling:
18
18
Scheduler : slurm
19
19
SlurmQueues :
20
20
- Name : queue-trn32
21
+ CapacityType : CAPACITY_BLOCK
21
22
ComputeResources :
22
23
- Name : compute-resource-trn32
23
- Instances :
24
- - InstanceType : {{instance}}
24
+ InstanceType : {{instance}}
25
25
MinCount : 2
26
+ MaxCount : 2
26
27
Efa :
27
28
Enabled : true
29
+ CapacityReservationTarget :
30
+ CapacityReservationId : cr-05b0c099ce2534ce3
28
31
Networking :
29
32
SubnetIds :
30
33
- {{ private_subnet_id }}
@@ -42,24 +45,24 @@ Scheduling:
42
45
- BucketName : {{ bucket_name }}
43
46
# Needed to download neuronx packages and neff file --> FIXME to be removed once packages are public available
44
47
- BucketName : aws-parallelcluster-beta
45
- - Name : queue-trn2
46
- ComputeResources :
47
- - Name : compute-resource-trn2
48
- Instances :
49
- - InstanceType : trn1.2xlarge
50
- MinCount : 0 # TODO change to 1 once allreduce test is passing
51
- Networking :
52
- SubnetIds :
53
- - {{ private_subnet_id }}
54
- CustomActions :
55
- OnNodeConfigured :
56
- Script : s3://{{ bucket_name }}/neuron-installation.sh
57
- Iam :
58
- # Policy to access to Trainium beta repository info
59
- AdditionalIamPolicies :
60
- - Policy : arn:aws:iam::447714826191:policy/TrainiumPreviewPolicy
61
- S3Access :
62
- # Needed to download post install script
63
- - BucketName : {{ bucket_name }}
64
- # Needed to download neuronx packages and neff file --> FIXME to be removed once packages are public available
65
- - BucketName : aws-parallelcluster-beta
48
+ # - Name: queue-trn2
49
+ # ComputeResources:
50
+ # - Name: compute-resource-trn2
51
+ # Instances:
52
+ # - InstanceType: trn1.2xlarge
53
+ # MinCount: 0 # TODO change to 1 once allreduce test is passing
54
+ # Networking:
55
+ # SubnetIds:
56
+ # - {{ private_subnet_id }}
57
+ # CustomActions:
58
+ # OnNodeConfigured:
59
+ # Script: s3://{{ bucket_name }}/neuron-installation.sh
60
+ # Iam:
61
+ # # Policy to access to Trainium beta repository info
62
+ # AdditionalIamPolicies:
63
+ # - Policy: arn:aws:iam::447714826191:policy/TrainiumPreviewPolicy
64
+ # S3Access:
65
+ # # Needed to download post install script
66
+ # - BucketName: {{ bucket_name }}
67
+ # # Needed to download neuronx packages and neff file --> FIXME to be removed once packages are public available
68
+ # - BucketName: aws-parallelcluster-beta
0 commit comments