fastmachinelearning · kondratyevd · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/docs/.values-table.md b/docs/.values-table.md
@@ -10,6 +10,9 @@
 | triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container |
 | triton.args[0] | string | `"/opt/tritonserver/bin/tritonserver \\\n--model-repository=/tmp/ \\\n--log-verbose=0 \\\n--exit-timeout-secs=60\n"` |  |
 | triton.resources | object | `{"limits":{"cpu":1,"memory":"2G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource limits and requests for each Triton instance. You can add necessary GPU request here. |
+| triton.annotations | object | `{}` | Annotations for Triton pods |
+| triton.nodeSelector | object | `{}` | Node selector for Triton pods |
+| triton.tolerations | list | `[]` | Tolerations for Triton pods |
 | triton.affinity | object | `{}` | Affinity rules for Triton pods - another way to request GPUs |
 | triton.modelRepository | object | `{"enabled":false,"mountPath":""}` | Model repository configuration |
 | triton.modelRepository.mountPath | string | `""` | Model repository mount path |
@@ -25,6 +28,9 @@
 | envoy.image | string | `"envoyproxy/envoy:v1.30.9"` | Envoy Proxy Docker image |
 | envoy.args | list | `["--config-path","/etc/envoy/envoy.yaml","--log-level","info","--log-path","/dev/stdout"]` | Arguments for Envoy |
 | envoy.resources | object | `{"limits":{"cpu":8,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
+| envoy.annotations | object | `{}` | Annotations for Envoy pods |
+| envoy.nodeSelector | object | `{}` | Node selector for Envoy pods |
+| envoy.tolerations | list | `[]` | Tolerations for Envoy pods |
 | envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. |
 | envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports |
 | envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy |
@@ -53,8 +59,6 @@
 | autoscaler.scaleDown.stabilizationWindowSeconds | int | `600` |  |
 | autoscaler.scaleDown.periodSeconds | int | `120` |  |
 | autoscaler.scaleDown.stepsize | int | `1` |  |
-| nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) |
-| tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) |
 | prometheus.external.enabled | bool | `false` | Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored. |
 | prometheus.external.url | string | `""` | External Prometheus server url |
 | prometheus.external.port | int | `443` | External Prometheus server port number |

diff --git a/helm/supersonic/templates/envoy/deployment.yaml b/helm/supersonic/templates/envoy/deployment.yaml
@@ -21,6 +21,10 @@ spec:
         app.kubernetes.io/name: {{ .Chart.Name }}
         app.kubernetes.io/instance: {{ include "supersonic.name" . }}
         app.kubernetes.io/component: envoy
+      {{- with .Values.envoy.annotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
     spec:
       containers:
       - name: envoy
@@ -68,13 +72,13 @@ spec:
       {{- end }}
 
 
-{{- if .Values.nodeSelector }}
+{{- if .Values.envoy.nodeSelector }}
       nodeSelector:
-{{ toYaml .Values.nodeSelector | nindent 8 }}
+{{ toYaml .Values.envoy.nodeSelector | nindent 8 }}
 {{- end }}
-{{- if .Values.tolerations }}
+{{- if .Values.envoy.tolerations }}
       tolerations:
-{{ toYaml .Values.tolerations | nindent 8 }}
+{{ toYaml .Values.envoy.tolerations | nindent 8 }}
 {{- end }}
       restartPolicy: Always
 

diff --git a/helm/supersonic/templates/triton/deployment.yaml b/helm/supersonic/templates/triton/deployment.yaml
@@ -19,6 +19,10 @@ spec:
         app.kubernetes.io/name: {{ .Chart.Name }}
         app.kubernetes.io/instance: {{ include "supersonic.name" . }}
         app.kubernetes.io/component: triton
+      {{- with .Values.triton.annotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
     spec:
       terminationGracePeriodSeconds: 60
       containers:
@@ -88,12 +92,12 @@ spec:
       affinity: {{ toYaml .Values.triton.affinity | nindent 8}}
 {{- end }}
 
-{{- if .Values.nodeSelector }}
+{{- if .Values.triton.nodeSelector }}
       nodeSelector:
-{{ toYaml .Values.nodeSelector | nindent 8 }}
+{{ toYaml .Values.triton.nodeSelector | nindent 8 }}
 {{- end }}
-{{- if .Values.tolerations }}
+{{- if .Values.triton.tolerations }}
       tolerations:
-{{ toYaml .Values.tolerations | nindent 8 }}
+{{ toYaml .Values.triton.tolerations | nindent 8 }}
 {{- end }}
       restartPolicy: Always
diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json
@@ -71,6 +71,15 @@
             "requests"
           ]
         },
+        "annotations": {
+          "type": "object"
+        },
+        "nodeSelector": {
+          "type": "object"
+        },
+        "tolerations": {
+          "type": "array"
+        },
         "affinity": {
           "type": "object"
         },
@@ -211,15 +220,18 @@
       },
       "required": [
         "affinity",
+        "annotations",
         "args",
         "command",
         "image",
         "modelRepository",
+        "nodeSelector",
         "readinessProbe",
         "replicas",
         "resources",
         "service",
-        "startupProbe"
+        "startupProbe",
+        "tolerations"
       ]
     },
     "envoy": {
@@ -279,6 +291,15 @@
             "requests"
           ]
         },
+        "annotations": {
+          "type": "object"
+        },
+        "nodeSelector": {
+          "type": "object"
+        },
+        "tolerations": {
+          "type": "array"
+        },
         "service": {
           "type": "object",
           "properties": {
@@ -422,17 +443,20 @@
         }
       },
       "required": [
+        "annotations",
         "args",
         "auth",
         "enabled",
         "grpc_route_timeout",
         "image",
         "ingress",
         "loadBalancerPolicy",
+        "nodeSelector",
         "rate_limiter",
         "replicas",
         "resources",
-        "service"
+        "service",
+        "tolerations"
       ]
     },
     "autoscaler": {
@@ -498,12 +522,6 @@
         "zeroIdleReplicas"
       ]
     },
-    "nodeSelector": {
-      "type": "object"
-    },
-    "tolerations": {
-      "type": "array"
-    },
     "prometheus": {
       "type": "object",
       "properties": {
@@ -1941,13 +1959,11 @@
     "grafana",
     "metricsCollector",
     "nameOverride",
-    "nodeSelector",
     "opentelemetry-collector",
     "prometheus",
     "serverLoadMetric",
     "serverLoadThreshold",
     "tempo",
-    "tolerations",
     "tracing_sampling_rate",
     "triton"
   ]

diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml
@@ -35,7 +35,16 @@ triton:
     requests:
       cpu: 1
       memory: "2G"
-
+
+  # -- Annotations for Triton pods
+  annotations: {}
+
+  # -- Node selector for Triton pods
+  nodeSelector: {}
+
+  # -- Tolerations for Triton pods
+  tolerations: []
+
   # -- Affinity rules for Triton pods - another way to request GPUs
   affinity: {}
 
@@ -118,6 +127,16 @@ envoy:
     limits:
       cpu: 8.0
       memory: "4G"
+
+  # -- Annotations for Envoy pods
+  annotations: {}
+
+  # -- Node selector for Envoy pods
+  nodeSelector: {}
+
+  # -- Tolerations for Envoy pods
+  tolerations: []
+
   service:
     # -- This is the client-facing endpoint. In order to be able to connect to it,
     # either enable ingress, or use type: LoadBalancer.
@@ -197,12 +216,6 @@ autoscaler:
     periodSeconds: 120
     stepsize: 1
 
-# -- Node selector for all pods (Triton and Envoy)
-nodeSelector: {}
-
-# -- Tolerations for all pods (Triton and Envoy)
-tolerations: []
-
 prometheus:
   external:
     # -- Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored.

diff --git a/values/values-geddes-cms.yaml b/values/values-geddes-cms.yaml
@@ -18,6 +18,12 @@ triton:
   resources:
     limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
     requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
+  nodeSelector: {'cms-af-prod': 'true'}
+  tolerations:
+    - key: hub.jupyter.org/dedicated
+      operator: Equal
+      value: cms-af
+      effect: NoSchedule
   service:
     labels:
       scrape_metrics: "true"
@@ -30,6 +36,12 @@ triton:
 
 envoy:
   enabled: true
+  nodeSelector: {'cms-af-prod': 'true'}
+  tolerations:
+    - key: hub.jupyter.org/dedicated
+      operator: Equal
+      value: cms-af
+      effect: NoSchedule
   loadBalancerPolicy: "ROUND_ROBIN"
   service:
     type: LoadBalancer
@@ -46,14 +58,6 @@ autoscaler:
 ingress:
   enabled: false
 
-nodeSelector: {'cms-af-prod': 'true'}
-
-tolerations:
-  - key: hub.jupyter.org/dedicated
-    operator: Equal
-    value: cms-af
-    effect: NoSchedule
-
 prometheus:
   enabled: true
   server:

diff --git a/values/values-nautilus-cms.yaml b/values/values-nautilus-cms.yaml
@@ -23,6 +23,8 @@ triton:
   resources:
     limits: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
     requests: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
+  nodeSelector:
+    topology.kubernetes.io/zone: ucsd
   # affinity:
   #   nodeAffinity:
   #     requiredDuringSchedulingIgnoredDuringExecution:
@@ -44,6 +46,8 @@ envoy:
   replicas: 1
   grpc_route_timeout: 5s
   loadBalancerPolicy: "LEAST_REQUEST"
+  nodeSelector:
+    topology.kubernetes.io/zone: ucsd
   ingress:
     enabled: true
     hostName: sonic-cms.nrp-nautilus.io
@@ -73,9 +77,6 @@ autoscaler:
     periodSeconds: 15
     stepsize: 1
 
-nodeSelector:
-  topology.kubernetes.io/zone: ucsd
-
 prometheus:
   enabled: true
   server: