From 6711cb75c3e2035e1dc04343ac273bc4db4cc6cf Mon Sep 17 00:00:00 2001 From: musa-asad Date: Mon, 20 May 2024 12:10:32 -0400 Subject: [PATCH 1/2] Added tolerations support for dcgmexporter and neuronmonitor. --- apis/v1alpha1/dcgmexpoter_types.go | 4 ++++ apis/v1alpha1/neuronmonitor_types.go | 4 ++++ internal/manifests/dcgmexporter/daemonset.go | 1 + internal/manifests/neuronmonitor/daemonset.go | 1 + 4 files changed, 10 insertions(+) diff --git a/apis/v1alpha1/dcgmexpoter_types.go b/apis/v1alpha1/dcgmexpoter_types.go index 51332794b..1d471b0de 100644 --- a/apis/v1alpha1/dcgmexpoter_types.go +++ b/apis/v1alpha1/dcgmexpoter_types.go @@ -43,6 +43,10 @@ type DcgmExporterSpec struct { // consumed in the config file for the Collector. // +optional Env []v1.EnvVar `json:"env,omitempty"` + // Toleration to schedule OpenTelemetry Collector pods. + // This is only relevant to daemonset, statefulset, and deployment mode + // +optional + Tolerations []v1.Toleration `json:"tolerations,omitempty"` // Volumes represents which volumes to use in the underlying collector deployment(s). // +optional // +listType=atomic diff --git a/apis/v1alpha1/neuronmonitor_types.go b/apis/v1alpha1/neuronmonitor_types.go index 8f68979e0..51677a3ba 100644 --- a/apis/v1alpha1/neuronmonitor_types.go +++ b/apis/v1alpha1/neuronmonitor_types.go @@ -62,6 +62,10 @@ type NeuronMonitorSpec struct { // consumed in the config file for the Collector. // +optional Env []v1.EnvVar `json:"env,omitempty"` + // Toleration to schedule OpenTelemetry Collector pods. + // This is only relevant to daemonset, statefulset, and deployment mode + // +optional + Tolerations []v1.Toleration `json:"tolerations,omitempty"` // Volumes represents which volumes to use in the underlying collector deployment(s). // +optional // +listType=atomic diff --git a/internal/manifests/dcgmexporter/daemonset.go b/internal/manifests/dcgmexporter/daemonset.go index 142c3c2c8..764352df6 100644 --- a/internal/manifests/dcgmexporter/daemonset.go +++ b/internal/manifests/dcgmexporter/daemonset.go @@ -40,6 +40,7 @@ func DaemonSet(params manifests.Params) *appsv1.DaemonSet { ServiceAccountName: ServiceAccountName(params.DcgmExp), Containers: []corev1.Container{Container(params.Config, params.Log, params.DcgmExp)}, Volumes: Volumes(params.DcgmExp), + Tolerations: params.DcgmExp.Spec.Tolerations, NodeSelector: params.DcgmExp.Spec.NodeSelector, Affinity: params.DcgmExp.Spec.Affinity, }, diff --git a/internal/manifests/neuronmonitor/daemonset.go b/internal/manifests/neuronmonitor/daemonset.go index b0562563f..85f6f00ea 100644 --- a/internal/manifests/neuronmonitor/daemonset.go +++ b/internal/manifests/neuronmonitor/daemonset.go @@ -40,6 +40,7 @@ func DaemonSet(params manifests.Params) *appsv1.DaemonSet { ServiceAccountName: ServiceAccountName(params.NeuronExp), Containers: []corev1.Container{Container(params.Config, params.Log, params.NeuronExp)}, Volumes: Volumes(params.NeuronExp), + Tolerations: params.NeuronExp.Spec.Tolerations, NodeSelector: params.NeuronExp.Spec.NodeSelector, Affinity: params.NeuronExp.Spec.Affinity, }, From 8297581c682dcdb7ccc7b5b9ad2133ca459defe3 Mon Sep 17 00:00:00 2001 From: musa-asad Date: Mon, 20 May 2024 13:22:09 -0400 Subject: [PATCH 2/2] Fixed comments. --- apis/v1alpha1/dcgmexpoter_types.go | 2 +- apis/v1alpha1/neuronmonitor_types.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apis/v1alpha1/dcgmexpoter_types.go b/apis/v1alpha1/dcgmexpoter_types.go index 1d471b0de..628490a48 100644 --- a/apis/v1alpha1/dcgmexpoter_types.go +++ b/apis/v1alpha1/dcgmexpoter_types.go @@ -43,7 +43,7 @@ type DcgmExporterSpec struct { // consumed in the config file for the Collector. // +optional Env []v1.EnvVar `json:"env,omitempty"` - // Toleration to schedule OpenTelemetry Collector pods. + // Toleration to schedule DCGM Exporter pods. // This is only relevant to daemonset, statefulset, and deployment mode // +optional Tolerations []v1.Toleration `json:"tolerations,omitempty"` diff --git a/apis/v1alpha1/neuronmonitor_types.go b/apis/v1alpha1/neuronmonitor_types.go index 51677a3ba..d684791c2 100644 --- a/apis/v1alpha1/neuronmonitor_types.go +++ b/apis/v1alpha1/neuronmonitor_types.go @@ -62,7 +62,7 @@ type NeuronMonitorSpec struct { // consumed in the config file for the Collector. // +optional Env []v1.EnvVar `json:"env,omitempty"` - // Toleration to schedule OpenTelemetry Collector pods. + // Toleration to schedule Neuron Monitor Exporter pods. // This is only relevant to daemonset, statefulset, and deployment mode // +optional Tolerations []v1.Toleration `json:"tolerations,omitempty"`