Skip to content

refactor configuration to support interLink & other updates #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/.values-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
| triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container |
| triton.args[0] | string | `"/opt/tritonserver/bin/tritonserver \\\n--model-repository=/tmp/ \\\n--log-verbose=0 \\\n--exit-timeout-secs=60\n"` | |
| triton.resources | object | `{"limits":{"cpu":1,"memory":"2G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource limits and requests for each Triton instance. You can add necessary GPU request here. |
| triton.annotations | object | `{}` | Annotations for Triton pods |
| triton.nodeSelector | object | `{}` | Node selector for Triton pods |
| triton.tolerations | list | `[]` | Tolerations for Triton pods |
| triton.affinity | object | `{}` | Affinity rules for Triton pods - another way to request GPUs |
| triton.modelRepository | object | `{"enabled":false,"mountPath":""}` | Model repository configuration |
| triton.modelRepository.mountPath | string | `""` | Model repository mount path |
Expand All @@ -25,6 +28,9 @@
| envoy.image | string | `"envoyproxy/envoy:v1.30.9"` | Envoy Proxy Docker image |
| envoy.args | list | `["--config-path","/etc/envoy/envoy.yaml","--log-level","info","--log-path","/dev/stdout"]` | Arguments for Envoy |
| envoy.resources | object | `{"limits":{"cpu":8,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
| envoy.annotations | object | `{}` | Annotations for Envoy pods |
| envoy.nodeSelector | object | `{}` | Node selector for Envoy pods |
| envoy.tolerations | list | `[]` | Tolerations for Envoy pods |
| envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. |
| envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports |
| envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy |
Expand Down Expand Up @@ -53,8 +59,6 @@
| autoscaler.scaleDown.stabilizationWindowSeconds | int | `600` | |
| autoscaler.scaleDown.periodSeconds | int | `120` | |
| autoscaler.scaleDown.stepsize | int | `1` | |
| nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) |
| tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) |
| prometheus.external.enabled | bool | `false` | Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored. |
| prometheus.external.url | string | `""` | External Prometheus server url |
| prometheus.external.port | int | `443` | External Prometheus server port number |
Expand Down
12 changes: 8 additions & 4 deletions helm/supersonic/templates/envoy/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ spec:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ include "supersonic.name" . }}
app.kubernetes.io/component: envoy
{{- with .Values.envoy.annotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
containers:
- name: envoy
Expand Down Expand Up @@ -68,13 +72,13 @@ spec:
{{- end }}


{{- if .Values.nodeSelector }}
{{- if .Values.envoy.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | nindent 8 }}
{{ toYaml .Values.envoy.nodeSelector | nindent 8 }}
{{- end }}
{{- if .Values.tolerations }}
{{- if .Values.envoy.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | nindent 8 }}
{{ toYaml .Values.envoy.tolerations | nindent 8 }}
{{- end }}
restartPolicy: Always

Expand Down
12 changes: 8 additions & 4 deletions helm/supersonic/templates/triton/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ spec:
app.kubernetes.io/name: {{ .Chart.Name }}
app.kubernetes.io/instance: {{ include "supersonic.name" . }}
app.kubernetes.io/component: triton
{{- with .Values.triton.annotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
terminationGracePeriodSeconds: 60
containers:
Expand Down Expand Up @@ -88,12 +92,12 @@ spec:
affinity: {{ toYaml .Values.triton.affinity | nindent 8}}
{{- end }}

{{- if .Values.nodeSelector }}
{{- if .Values.triton.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | nindent 8 }}
{{ toYaml .Values.triton.nodeSelector | nindent 8 }}
{{- end }}
{{- if .Values.tolerations }}
{{- if .Values.triton.tolerations }}
tolerations:
{{ toYaml .Values.tolerations | nindent 8 }}
{{ toYaml .Values.triton.tolerations | nindent 8 }}
{{- end }}
restartPolicy: Always
36 changes: 26 additions & 10 deletions helm/supersonic/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@
"requests"
]
},
"annotations": {
"type": "object"
},
"nodeSelector": {
"type": "object"
},
"tolerations": {
"type": "array"
},
"affinity": {
"type": "object"
},
Expand Down Expand Up @@ -211,15 +220,18 @@
},
"required": [
"affinity",
"annotations",
"args",
"command",
"image",
"modelRepository",
"nodeSelector",
"readinessProbe",
"replicas",
"resources",
"service",
"startupProbe"
"startupProbe",
"tolerations"
]
},
"envoy": {
Expand Down Expand Up @@ -279,6 +291,15 @@
"requests"
]
},
"annotations": {
"type": "object"
},
"nodeSelector": {
"type": "object"
},
"tolerations": {
"type": "array"
},
"service": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -422,17 +443,20 @@
}
},
"required": [
"annotations",
"args",
"auth",
"enabled",
"grpc_route_timeout",
"image",
"ingress",
"loadBalancerPolicy",
"nodeSelector",
"rate_limiter",
"replicas",
"resources",
"service"
"service",
"tolerations"
]
},
"autoscaler": {
Expand Down Expand Up @@ -498,12 +522,6 @@
"zeroIdleReplicas"
]
},
"nodeSelector": {
"type": "object"
},
"tolerations": {
"type": "array"
},
"prometheus": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -1941,13 +1959,11 @@
"grafana",
"metricsCollector",
"nameOverride",
"nodeSelector",
"opentelemetry-collector",
"prometheus",
"serverLoadMetric",
"serverLoadThreshold",
"tempo",
"tolerations",
"tracing_sampling_rate",
"triton"
]
Expand Down
27 changes: 20 additions & 7 deletions helm/supersonic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,16 @@ triton:
requests:
cpu: 1
memory: "2G"


# -- Annotations for Triton pods
annotations: {}

# -- Node selector for Triton pods
nodeSelector: {}

# -- Tolerations for Triton pods
tolerations: []

# -- Affinity rules for Triton pods - another way to request GPUs
affinity: {}

Expand Down Expand Up @@ -118,6 +127,16 @@ envoy:
limits:
cpu: 8.0
memory: "4G"

# -- Annotations for Envoy pods
annotations: {}

# -- Node selector for Envoy pods
nodeSelector: {}

# -- Tolerations for Envoy pods
tolerations: []

service:
# -- This is the client-facing endpoint. In order to be able to connect to it,
# either enable ingress, or use type: LoadBalancer.
Expand Down Expand Up @@ -197,12 +216,6 @@ autoscaler:
periodSeconds: 120
stepsize: 1

# -- Node selector for all pods (Triton and Envoy)
nodeSelector: {}

# -- Tolerations for all pods (Triton and Envoy)
tolerations: []

prometheus:
external:
# -- Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored.
Expand Down
20 changes: 12 additions & 8 deletions values/values-geddes-cms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ triton:
resources:
limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
nodeSelector: {'cms-af-prod': 'true'}
tolerations:
- key: hub.jupyter.org/dedicated
operator: Equal
value: cms-af
effect: NoSchedule
service:
labels:
scrape_metrics: "true"
Expand All @@ -30,6 +36,12 @@ triton:

envoy:
enabled: true
nodeSelector: {'cms-af-prod': 'true'}
tolerations:
- key: hub.jupyter.org/dedicated
operator: Equal
value: cms-af
effect: NoSchedule
loadBalancerPolicy: "ROUND_ROBIN"
service:
type: LoadBalancer
Expand All @@ -46,14 +58,6 @@ autoscaler:
ingress:
enabled: false

nodeSelector: {'cms-af-prod': 'true'}

tolerations:
- key: hub.jupyter.org/dedicated
operator: Equal
value: cms-af
effect: NoSchedule

prometheus:
enabled: true
server:
Expand Down
7 changes: 4 additions & 3 deletions values/values-nautilus-cms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ triton:
resources:
limits: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
requests: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
nodeSelector:
topology.kubernetes.io/zone: ucsd
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
Expand All @@ -44,6 +46,8 @@ envoy:
replicas: 1
grpc_route_timeout: 5s
loadBalancerPolicy: "LEAST_REQUEST"
nodeSelector:
topology.kubernetes.io/zone: ucsd
ingress:
enabled: true
hostName: sonic-cms.nrp-nautilus.io
Expand Down Expand Up @@ -73,9 +77,6 @@ autoscaler:
periodSeconds: 15
stepsize: 1

nodeSelector:
topology.kubernetes.io/zone: ucsd

prometheus:
enabled: true
server:
Expand Down
Loading