Skip to content

Commit a6d5d52

Browse files
sjawhar0x2b3bfa0
andauthored
Use region field as comma-delimited node selector for k8s (#657)
* Use `region` field as comma-delimited node selector for k8s * UPdate docs for k8s region/nodeSelectoR * Apply suggestions from code review Co-authored-by: Helio Machado <0x2b3bfa0+git@googlemail.com>
1 parent 6f10bfe commit a6d5d52

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

docs/resources/task.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ resource "iterative_task" "example" {
5454

5555
### Optional
5656

57-
- `region` - (Optional) [Cloud region/zone](#cloud-region) to run the task on.
57+
- `region` - (Optional) [Cloud region/zone](#cloud-region) to run the task on, or node selector labels for Kubernetes.
5858
- `machine` - (Optional) See [Machine Types](#machine-type) below.
5959
- `disk_size` - (Optional) Size of the ephemeral machine storage in GB. `-1`: automatic based on `image`.
6060
- `spot` - (Optional) Spot instance price. `-1`: disabled, `0`: automatic price, any other positive number: maximum bidding price in USD per hour (above which the instance is terminated until the price drops).
@@ -257,7 +257,25 @@ In addition to generic regions, it's possible to specify any cloud region suppor
257257

258258
### Kubernetes
259259

260-
The `region` attribute is ignored.
260+
For Kubernetes, the `region` attribute can be used to set the [`nodeSelector`](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) field of the job specification. The value of `region` should be a comma-delimited list of `key=value` nodel label pairs.
261+
262+
For example:
263+
```tf
264+
resource "iterative_task" "example" {
265+
cloud = "k8s"
266+
region = "foo=bar,goo=baz"
267+
}
268+
```
269+
270+
This will create a pod with a `nodeSelector` value like the following:
271+
```yaml
272+
apiVersion: V1
273+
kind: Pod
274+
spec:
275+
nodeSelector:
276+
foo: bar
277+
goo: baz
278+
```
261279
262280
## Permission Set
263281
@@ -285,10 +303,6 @@ A comma-separated list of [user-assigned identity](https://docs.microsoft.com/en
285303

286304
### Kubernetes
287305

288-
#### Region attribute
289-
290-
Setting the `region` attribute results in undefined behaviour.
291-
292306
#### Directory storage
293307

294308
Unlike public cloud providers, Kubernetes does not offer any portable way of persisting and sharing storage between pods. When specified, the `storage.workdir` attribute will create a `PersistentVolumeClaim` of the default `StorageClass`, with the same lifecycle as the task and the specified `disk_size`.

task/k8s/resources/resource_job.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,26 @@ func NewJob(client *client.Client, identifier common.Identifier, persistentVolum
3636
j.Dependencies.PermissionSet = permissionSet
3737
j.Attributes.Task = task
3838
j.Attributes.Parallelism = task.Parallelism
39+
j.Attributes.NodeSelector = map[string]string{}
40+
for _, selector := range strings.Split(string(client.Cloud.Region), ",") {
41+
key, value, is_found := strings.Cut(selector, "=")
42+
if is_found && len(value) > 0 {
43+
j.Attributes.NodeSelector[key] = value
44+
}
45+
}
3946
return j
4047
}
4148

4249
type Job struct {
4350
Client *client.Client
4451
Identifier string
4552
Attributes struct {
46-
Task common.Task
47-
Parallelism uint16
48-
Addresses []net.IP
49-
Status common.Status
50-
Events []common.Event
53+
Task common.Task
54+
Parallelism uint16
55+
NodeSelector map[string]string
56+
Addresses []net.IP
57+
Status common.Status
58+
Events []common.Event
5159
}
5260
Dependencies struct {
5361
*PersistentVolumeClaim
@@ -90,8 +98,12 @@ func (j *Job) Create(ctx context.Context) error {
9098
return common.NotFoundError
9199
}
92100

93-
// Define the accelerator settings (i.e. GPU type, model, ...)
94101
jobNodeSelector := map[string]string{}
102+
for selector, value := range j.Attributes.NodeSelector {
103+
jobNodeSelector[selector] = value
104+
}
105+
106+
// Define the accelerator settings (i.e. GPU type, model, ...)
95107
jobAccelerator := match[3]
96108
jobGPUType := "nvidia.com/gpu"
97109
jobGPUCount := match[4]
@@ -108,7 +120,7 @@ func (j *Job) Create(ctx context.Context) error {
108120
if jobGPUCount > "0" {
109121
jobLimits[kubernetes_core.ResourceName(jobGPUType)] = kubernetes_resource.MustParse(jobGPUCount)
110122
if jobAccelerator != "" {
111-
jobNodeSelector = map[string]string{"accelerator": jobAccelerator}
123+
jobNodeSelector["accelerator"] = jobAccelerator
112124
}
113125
}
114126

0 commit comments

Comments
 (0)