diff --git a/docs/.values-table.md b/docs/.values-table.md index 0847bf8..4d7427c 100644 --- a/docs/.values-table.md +++ b/docs/.values-table.md @@ -10,6 +10,9 @@ | triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container | | triton.args[0] | string | `"/opt/tritonserver/bin/tritonserver \\\n--model-repository=/tmp/ \\\n--log-verbose=0 \\\n--exit-timeout-secs=60\n"` | | | triton.resources | object | `{"limits":{"cpu":1,"memory":"2G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource limits and requests for each Triton instance. You can add necessary GPU request here. | +| triton.annotations | object | `{}` | Annotations for Triton pods | +| triton.nodeSelector | object | `{}` | Node selector for Triton pods | +| triton.tolerations | list | `[]` | Tolerations for Triton pods | | triton.affinity | object | `{}` | Affinity rules for Triton pods - another way to request GPUs | | triton.modelRepository | object | `{"enabled":false,"mountPath":""}` | Model repository configuration | | triton.modelRepository.mountPath | string | `""` | Model repository mount path | @@ -25,6 +28,9 @@ | envoy.image | string | `"envoyproxy/envoy:v1.30.9"` | Envoy Proxy Docker image | | envoy.args | list | `["--config-path","/etc/envoy/envoy.yaml","--log-level","info","--log-path","/dev/stdout"]` | Arguments for Envoy | | envoy.resources | object | `{"limits":{"cpu":8,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU | +| envoy.annotations | object | `{}` | Annotations for Envoy pods | +| envoy.nodeSelector | object | `{}` | Node selector for Envoy pods | +| envoy.tolerations | list | `[]` | Tolerations for Envoy pods | | envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. | | envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports | | envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy | @@ -53,8 +59,6 @@ | autoscaler.scaleDown.stabilizationWindowSeconds | int | `600` | | | autoscaler.scaleDown.periodSeconds | int | `120` | | | autoscaler.scaleDown.stepsize | int | `1` | | -| nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) | -| tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) | | prometheus.external.enabled | bool | `false` | Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored. | | prometheus.external.url | string | `""` | External Prometheus server url | | prometheus.external.port | int | `443` | External Prometheus server port number | diff --git a/helm/supersonic/templates/envoy/deployment.yaml b/helm/supersonic/templates/envoy/deployment.yaml index 7b2666c..daf5bd8 100644 --- a/helm/supersonic/templates/envoy/deployment.yaml +++ b/helm/supersonic/templates/envoy/deployment.yaml @@ -21,6 +21,10 @@ spec: app.kubernetes.io/name: {{ .Chart.Name }} app.kubernetes.io/instance: {{ include "supersonic.name" . }} app.kubernetes.io/component: envoy + {{- with .Values.envoy.annotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} spec: containers: - name: envoy @@ -68,13 +72,13 @@ spec: {{- end }} -{{- if .Values.nodeSelector }} +{{- if .Values.envoy.nodeSelector }} nodeSelector: -{{ toYaml .Values.nodeSelector | nindent 8 }} +{{ toYaml .Values.envoy.nodeSelector | nindent 8 }} {{- end }} -{{- if .Values.tolerations }} +{{- if .Values.envoy.tolerations }} tolerations: -{{ toYaml .Values.tolerations | nindent 8 }} +{{ toYaml .Values.envoy.tolerations | nindent 8 }} {{- end }} restartPolicy: Always diff --git a/helm/supersonic/templates/triton/deployment.yaml b/helm/supersonic/templates/triton/deployment.yaml index b47be7e..02b6f79 100644 --- a/helm/supersonic/templates/triton/deployment.yaml +++ b/helm/supersonic/templates/triton/deployment.yaml @@ -19,6 +19,10 @@ spec: app.kubernetes.io/name: {{ .Chart.Name }} app.kubernetes.io/instance: {{ include "supersonic.name" . }} app.kubernetes.io/component: triton + {{- with .Values.triton.annotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} spec: terminationGracePeriodSeconds: 60 containers: @@ -88,12 +92,12 @@ spec: affinity: {{ toYaml .Values.triton.affinity | nindent 8}} {{- end }} -{{- if .Values.nodeSelector }} +{{- if .Values.triton.nodeSelector }} nodeSelector: -{{ toYaml .Values.nodeSelector | nindent 8 }} +{{ toYaml .Values.triton.nodeSelector | nindent 8 }} {{- end }} -{{- if .Values.tolerations }} +{{- if .Values.triton.tolerations }} tolerations: -{{ toYaml .Values.tolerations | nindent 8 }} +{{ toYaml .Values.triton.tolerations | nindent 8 }} {{- end }} restartPolicy: Always \ No newline at end of file diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json index 4ad91bd..15447ba 100644 --- a/helm/supersonic/values.schema.json +++ b/helm/supersonic/values.schema.json @@ -71,6 +71,15 @@ "requests" ] }, + "annotations": { + "type": "object" + }, + "nodeSelector": { + "type": "object" + }, + "tolerations": { + "type": "array" + }, "affinity": { "type": "object" }, @@ -211,15 +220,18 @@ }, "required": [ "affinity", + "annotations", "args", "command", "image", "modelRepository", + "nodeSelector", "readinessProbe", "replicas", "resources", "service", - "startupProbe" + "startupProbe", + "tolerations" ] }, "envoy": { @@ -279,6 +291,15 @@ "requests" ] }, + "annotations": { + "type": "object" + }, + "nodeSelector": { + "type": "object" + }, + "tolerations": { + "type": "array" + }, "service": { "type": "object", "properties": { @@ -422,6 +443,7 @@ } }, "required": [ + "annotations", "args", "auth", "enabled", @@ -429,10 +451,12 @@ "image", "ingress", "loadBalancerPolicy", + "nodeSelector", "rate_limiter", "replicas", "resources", - "service" + "service", + "tolerations" ] }, "autoscaler": { @@ -498,12 +522,6 @@ "zeroIdleReplicas" ] }, - "nodeSelector": { - "type": "object" - }, - "tolerations": { - "type": "array" - }, "prometheus": { "type": "object", "properties": { @@ -1941,13 +1959,11 @@ "grafana", "metricsCollector", "nameOverride", - "nodeSelector", "opentelemetry-collector", "prometheus", "serverLoadMetric", "serverLoadThreshold", "tempo", - "tolerations", "tracing_sampling_rate", "triton" ] diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml index c261f19..57e129d 100644 --- a/helm/supersonic/values.yaml +++ b/helm/supersonic/values.yaml @@ -35,7 +35,16 @@ triton: requests: cpu: 1 memory: "2G" - + + # -- Annotations for Triton pods + annotations: {} + + # -- Node selector for Triton pods + nodeSelector: {} + + # -- Tolerations for Triton pods + tolerations: [] + # -- Affinity rules for Triton pods - another way to request GPUs affinity: {} @@ -118,6 +127,16 @@ envoy: limits: cpu: 8.0 memory: "4G" + + # -- Annotations for Envoy pods + annotations: {} + + # -- Node selector for Envoy pods + nodeSelector: {} + + # -- Tolerations for Envoy pods + tolerations: [] + service: # -- This is the client-facing endpoint. In order to be able to connect to it, # either enable ingress, or use type: LoadBalancer. @@ -197,12 +216,6 @@ autoscaler: periodSeconds: 120 stepsize: 1 -# -- Node selector for all pods (Triton and Envoy) -nodeSelector: {} - -# -- Tolerations for all pods (Triton and Envoy) -tolerations: [] - prometheus: external: # -- Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored. diff --git a/values/values-geddes-cms.yaml b/values/values-geddes-cms.yaml index 453ae6e..35e1494 100644 --- a/values/values-geddes-cms.yaml +++ b/values/values-geddes-cms.yaml @@ -18,6 +18,12 @@ triton: resources: limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G} requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G} + nodeSelector: {'cms-af-prod': 'true'} + tolerations: + - key: hub.jupyter.org/dedicated + operator: Equal + value: cms-af + effect: NoSchedule service: labels: scrape_metrics: "true" @@ -30,6 +36,12 @@ triton: envoy: enabled: true + nodeSelector: {'cms-af-prod': 'true'} + tolerations: + - key: hub.jupyter.org/dedicated + operator: Equal + value: cms-af + effect: NoSchedule loadBalancerPolicy: "ROUND_ROBIN" service: type: LoadBalancer @@ -46,14 +58,6 @@ autoscaler: ingress: enabled: false -nodeSelector: {'cms-af-prod': 'true'} - -tolerations: - - key: hub.jupyter.org/dedicated - operator: Equal - value: cms-af - effect: NoSchedule - prometheus: enabled: true server: diff --git a/values/values-nautilus-cms.yaml b/values/values-nautilus-cms.yaml index 51bad05..3c17e21 100644 --- a/values/values-nautilus-cms.yaml +++ b/values/values-nautilus-cms.yaml @@ -23,6 +23,8 @@ triton: resources: limits: { cpu: 1, memory: 3G, nvidia.com/gpu: 1} requests: { cpu: 1, memory: 3G, nvidia.com/gpu: 1} + nodeSelector: + topology.kubernetes.io/zone: ucsd # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: @@ -44,6 +46,8 @@ envoy: replicas: 1 grpc_route_timeout: 5s loadBalancerPolicy: "LEAST_REQUEST" + nodeSelector: + topology.kubernetes.io/zone: ucsd ingress: enabled: true hostName: sonic-cms.nrp-nautilus.io @@ -73,9 +77,6 @@ autoscaler: periodSeconds: 15 stepsize: 1 -nodeSelector: - topology.kubernetes.io/zone: ucsd - prometheus: enabled: true server: