From 12257d68e189575ce34048767f8b7712aed51e63 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 11 Jun 2025 11:18:59 -0400 Subject: [PATCH 1/3] support security context with dcgm --- ...oudwatch.aws.amazon.com_dcgmexporters.yaml | 161 ++++++++++++++++++ .../linux/dcgm-exporter-daemonset.yaml | 5 +- .../values.yaml | 12 +- 3 files changed, 172 insertions(+), 6 deletions(-) diff --git a/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml b/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml index d69daea4..d0d51d8c 100644 --- a/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml +++ b/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml @@ -265,6 +265,167 @@ spec: type: object x-kubernetes-map-type: atomic type: object + securityContext: + description: "SecurityContext configures the container security context + for the amazon-cloudwatch-agent container. \n In deployment, daemonset, + or statefulset mode, this controls the security context settings + for the primary application container. \n In sidecar mode, this + controls the security context for the injected sidecar container." + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a process + can gain more privileges than its parent process. This bool + directly controls if the no_new_privs flag will be set on the + container process. AllowPrivilegeEscalation is true always when + the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows.' + type: boolean + capabilities: + description: The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container + runtime. Note that this field cannot be set when spec.os.name + is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities type + type: string + type: array + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities type + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes in privileged + containers are essentially equivalent to root on the host. Defaults + to false. Note that this field cannot be set when spec.os.name + is windows. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use for + the containers. The default is DefaultProcMount which uses the + container runtime defaults for readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. Note that this field cannot be set when spec.os.name + is windows. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. Note that this + field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. Note that this field cannot be set when spec.os.name + is windows. + format: int64 + type: integer + seLinuxOptions: + description: The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random + SELinux context for each container. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. Note that this + field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies to + the container. + type: string + role: + description: Role is a SELinux role label that applies to + the container. + type: string + type: + description: Type is a SELinux type label that applies to + the container. + type: string + user: + description: User is a SELinux user label that applies to + the container. + type: string + type: object + seccompProfile: + description: The seccomp options to use by this container. If + seccomp options are provided at both the pod & container level, + the container options override the pod options. Note that this + field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined + in a file on the node should be used. The profile must be + preconfigured on the node to work. Must be a descending + path, relative to the kubelet's configured seccomp profile + location. Must be set if type is "Localhost". Must NOT be + set for any other type. + type: string + type: + description: "type indicates which kind of seccomp profile + will be applied. Valid options are: \n Localhost - a profile + defined in a file on the node should be used. RuntimeDefault + - the container runtime default profile should be used. + Unconfined - no profile should be applied." + type: string + required: + - type + type: object + windowsOptions: + description: The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will + be used. If set in both SecurityContext and PodSecurityContext, + the value specified in SecurityContext takes precedence. Note + that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: GMSACredentialSpec is where the GMSA admission + webhook (https://github.com/kubernetes-sigs/windows-gmsa) + inlines the contents of the GMSA credential spec named by + the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the GMSA + credential spec to use. + type: string + hostProcess: + description: HostProcess determines if a container should + be run as a 'Host Process' container. All of a Pod's containers + must have the same effective HostProcess value (it is not + allowed to have a mix of HostProcess containers and non-HostProcess + containers). In addition, if HostProcess is true then HostNetwork + must also be set to true. + type: boolean + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in PodSecurityContext. + If set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: string + type: object + type: object podAffinity: description: Describes pod affinity scheduling rules (e.g. co-locate this pod in the same node, zone, etc. as some other pod(s)). diff --git a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml index c4d8a219..93cc641f 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml @@ -18,6 +18,9 @@ spec: {{- with .Values.dcgmExporter.resources }} resources: {{- toYaml . | nindent 4}} {{- end }} + {{- with .Values.dcgmExporter.securityContext }} + securityContext: {{- toYaml . | nindent 4 }} + {{- end }} env: - name: "DCGM_EXPORTER_KUBERNETES" value: "true" @@ -43,7 +46,7 @@ spec: secretName: amazon-cloudwatch-observability-agent-cert items: - key: tls.crt - path: server.crt + path: server.crt`` - key: tls.key path: server.key - name: "pod-gpu-resources" diff --git a/charts/amazon-cloudwatch-observability/values.yaml b/charts/amazon-cloudwatch-observability/values.yaml index 76fc7985..8b6d75a4 100644 --- a/charts/amazon-cloudwatch-observability/values.yaml +++ b/charts/amazon-cloudwatch-observability/values.yaml @@ -1035,10 +1035,10 @@ containerLogs: manager: name: image: - repository: cloudwatch-agent-operator - tag: 3.0.1 + repository: cwagent-operator-release + tag: latest repositoryDomainMap: - public: public.ecr.aws/cloudwatch-agent + public: 231392302985.dkr.ecr.us-west-2.amazonaws.com cn-north-1: 934860584483.dkr.ecr.cn-north-1.amazonaws.com.cn cn-northwest-1: 934860584483.dkr.ecr.cn-northwest-1.amazonaws.com.cn us-gov-east-1: 743662458514.dkr.ecr.us-gov-east-1.amazonaws.com @@ -1363,6 +1363,10 @@ dcgmExporter: kubeletPath: /var/lib/kubelet/pod-resources serviceAccount: name: # override exporter service account name + securityContext: + capabilities: + add: + - SYS_ADMIN affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -1521,8 +1525,6 @@ neuronMonitor: port: 8000 address: :8000 securityContext: - runAsNonRoot: false - runAsUser: 0 capabilities: add: - SYS_ADMIN From fe6b45b8778d2448e921a5e4388a1e40ffde36a9 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 11 Jun 2025 15:59:23 -0400 Subject: [PATCH 2/3] clean up and fix crd --- ...oudwatch.aws.amazon.com_dcgmexporters.yaml | 322 +++++++++--------- .../linux/dcgm-exporter-daemonset.yaml | 2 +- .../values.yaml | 8 +- 3 files changed, 167 insertions(+), 165 deletions(-) diff --git a/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml b/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml index d0d51d8c..7109831a 100644 --- a/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml +++ b/charts/amazon-cloudwatch-observability/crds/cloudwatch.aws.amazon.com_dcgmexporters.yaml @@ -55,6 +55,167 @@ spec: spec: description: DcgmExporterSpec defines the desired state of DcgmExporter. properties: + securityContext: + description: "SecurityContext configures the container security context + for the amazon-cloudwatch-agent container. \n In deployment, daemonset, + or statefulset mode, this controls the security context settings + for the primary application container. \n In sidecar mode, this + controls the security context for the injected sidecar container." + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a process + can gain more privileges than its parent process. This bool + directly controls if the no_new_privs flag will be set on the + container process. AllowPrivilegeEscalation is true always when + the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows.' + type: boolean + capabilities: + description: The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container + runtime. Note that this field cannot be set when spec.os.name + is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities type + type: string + type: array + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities type + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes in privileged + containers are essentially equivalent to root on the host. Defaults + to false. Note that this field cannot be set when spec.os.name + is windows. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use for + the containers. The default is DefaultProcMount which uses the + container runtime defaults for readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. Note that this field cannot be set when spec.os.name + is windows. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. Note that this + field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. Note that this field cannot be set when spec.os.name + is windows. + format: int64 + type: integer + seLinuxOptions: + description: The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random + SELinux context for each container. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence. Note that this + field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies to + the container. + type: string + role: + description: Role is a SELinux role label that applies to + the container. + type: string + type: + description: Type is a SELinux type label that applies to + the container. + type: string + user: + description: User is a SELinux user label that applies to + the container. + type: string + type: object + seccompProfile: + description: The seccomp options to use by this container. If + seccomp options are provided at both the pod & container level, + the container options override the pod options. Note that this + field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: localhostProfile indicates a profile defined + in a file on the node should be used. The profile must be + preconfigured on the node to work. Must be a descending + path, relative to the kubelet's configured seccomp profile + location. Must be set if type is "Localhost". Must NOT be + set for any other type. + type: string + type: + description: "type indicates which kind of seccomp profile + will be applied. Valid options are: \n Localhost - a profile + defined in a file on the node should be used. RuntimeDefault + - the container runtime default profile should be used. + Unconfined - no profile should be applied." + type: string + required: + - type + type: object + windowsOptions: + description: The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will + be used. If set in both SecurityContext and PodSecurityContext, + the value specified in SecurityContext takes precedence. Note + that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: GMSACredentialSpec is where the GMSA admission + webhook (https://github.com/kubernetes-sigs/windows-gmsa) + inlines the contents of the GMSA credential spec named by + the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the GMSA + credential spec to use. + type: string + hostProcess: + description: HostProcess determines if a container should + be run as a 'Host Process' container. All of a Pod's containers + must have the same effective HostProcess value (it is not + allowed to have a mix of HostProcess containers and non-HostProcess + containers). In addition, if HostProcess is true then HostNetwork + must also be set to true. + type: boolean + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set in PodSecurityContext. + If set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: string + type: object + type: object affinity: description: If specified, indicates the pod's scheduling constraints properties: @@ -265,167 +426,6 @@ spec: type: object x-kubernetes-map-type: atomic type: object - securityContext: - description: "SecurityContext configures the container security context - for the amazon-cloudwatch-agent container. \n In deployment, daemonset, - or statefulset mode, this controls the security context settings - for the primary application container. \n In sidecar mode, this - controls the security context for the injected sidecar container." - properties: - allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a process - can gain more privileges than its parent process. This bool - directly controls if the no_new_privs flag will be set on the - container process. AllowPrivilegeEscalation is true always when - the container is: 1) run as Privileged 2) has CAP_SYS_ADMIN - Note that this field cannot be set when spec.os.name is windows.' - type: boolean - capabilities: - description: The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the container - runtime. Note that this field cannot be set when spec.os.name - is windows. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX capabilities type - type: string - type: array - drop: - description: Removed capabilities - items: - description: Capability represent POSIX capabilities type - type: string - type: array - type: object - privileged: - description: Run container in privileged mode. Processes in privileged - containers are essentially equivalent to root on the host. Defaults - to false. Note that this field cannot be set when spec.os.name - is windows. - type: boolean - procMount: - description: procMount denotes the type of proc mount to use for - the containers. The default is DefaultProcMount which uses the - container runtime defaults for readonly paths and masked paths. - This requires the ProcMountType feature flag to be enabled. - Note that this field cannot be set when spec.os.name is windows. - type: string - readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. - Default is false. Note that this field cannot be set when spec.os.name - is windows. - type: boolean - runAsGroup: - description: The GID to run the entrypoint of the container process. - Uses runtime default if unset. May also be set in PodSecurityContext. If - set in both SecurityContext and PodSecurityContext, the value - specified in SecurityContext takes precedence. Note that this - field cannot be set when spec.os.name is windows. - format: int64 - type: integer - runAsNonRoot: - description: Indicates that the container must run as a non-root - user. If true, the Kubelet will validate the image at runtime - to ensure that it does not run as UID 0 (root) and fail to start - the container if it does. If unset or false, no such validation - will be performed. May also be set in PodSecurityContext. If - set in both SecurityContext and PodSecurityContext, the value - specified in SecurityContext takes precedence. - type: boolean - runAsUser: - description: The UID to run the entrypoint of the container process. - Defaults to user specified in image metadata if unspecified. - May also be set in PodSecurityContext. If set in both SecurityContext - and PodSecurityContext, the value specified in SecurityContext - takes precedence. Note that this field cannot be set when spec.os.name - is windows. - format: int64 - type: integer - seLinuxOptions: - description: The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random - SELinux context for each container. May also be set in PodSecurityContext. If - set in both SecurityContext and PodSecurityContext, the value - specified in SecurityContext takes precedence. Note that this - field cannot be set when spec.os.name is windows. - properties: - level: - description: Level is SELinux level label that applies to - the container. - type: string - role: - description: Role is a SELinux role label that applies to - the container. - type: string - type: - description: Type is a SELinux type label that applies to - the container. - type: string - user: - description: User is a SELinux user label that applies to - the container. - type: string - type: object - seccompProfile: - description: The seccomp options to use by this container. If - seccomp options are provided at both the pod & container level, - the container options override the pod options. Note that this - field cannot be set when spec.os.name is windows. - properties: - localhostProfile: - description: localhostProfile indicates a profile defined - in a file on the node should be used. The profile must be - preconfigured on the node to work. Must be a descending - path, relative to the kubelet's configured seccomp profile - location. Must be set if type is "Localhost". Must NOT be - set for any other type. - type: string - type: - description: "type indicates which kind of seccomp profile - will be applied. Valid options are: \n Localhost - a profile - defined in a file on the node should be used. RuntimeDefault - - the container runtime default profile should be used. - Unconfined - no profile should be applied." - type: string - required: - - type - type: object - windowsOptions: - description: The Windows specific settings applied to all containers. - If unspecified, the options from the PodSecurityContext will - be used. If set in both SecurityContext and PodSecurityContext, - the value specified in SecurityContext takes precedence. Note - that this field cannot be set when spec.os.name is linux. - properties: - gmsaCredentialSpec: - description: GMSACredentialSpec is where the GMSA admission - webhook (https://github.com/kubernetes-sigs/windows-gmsa) - inlines the contents of the GMSA credential spec named by - the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name of the GMSA - credential spec to use. - type: string - hostProcess: - description: HostProcess determines if a container should - be run as a 'Host Process' container. All of a Pod's containers - must have the same effective HostProcess value (it is not - allowed to have a mix of HostProcess containers and non-HostProcess - containers). In addition, if HostProcess is true then HostNetwork - must also be set to true. - type: boolean - runAsUserName: - description: The UserName in Windows to run the entrypoint - of the container process. Defaults to the user specified - in image metadata if unspecified. May also be set in PodSecurityContext. - If set in both SecurityContext and PodSecurityContext, the - value specified in SecurityContext takes precedence. - type: string - type: object - type: object podAffinity: description: Describes pod affinity scheduling rules (e.g. co-locate this pod in the same node, zone, etc. as some other pod(s)). diff --git a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml index 93cc641f..e3c50ced 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml @@ -46,7 +46,7 @@ spec: secretName: amazon-cloudwatch-observability-agent-cert items: - key: tls.crt - path: server.crt`` + path: server.crt - key: tls.key path: server.key - name: "pod-gpu-resources" diff --git a/charts/amazon-cloudwatch-observability/values.yaml b/charts/amazon-cloudwatch-observability/values.yaml index 8b6d75a4..6c3ff5dc 100644 --- a/charts/amazon-cloudwatch-observability/values.yaml +++ b/charts/amazon-cloudwatch-observability/values.yaml @@ -1035,10 +1035,10 @@ containerLogs: manager: name: image: - repository: cwagent-operator-release - tag: latest + repository: cloudwatch-agent-operator + tag: 3.0.0 repositoryDomainMap: - public: 231392302985.dkr.ecr.us-west-2.amazonaws.com + public: public.ecr.aws/cloudwatch-agent cn-north-1: 934860584483.dkr.ecr.cn-north-1.amazonaws.com.cn cn-northwest-1: 934860584483.dkr.ecr.cn-northwest-1.amazonaws.com.cn us-gov-east-1: 743662458514.dkr.ecr.us-gov-east-1.amazonaws.com @@ -1525,6 +1525,8 @@ neuronMonitor: port: 8000 address: :8000 securityContext: + runAsNonRoot: false + runAsUser: 0 capabilities: add: - SYS_ADMIN From 072a3efe1ac6048de3819b49562de740ad008a6c Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 11 Jun 2025 16:01:54 -0400 Subject: [PATCH 3/3] fix version --- charts/amazon-cloudwatch-observability/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/amazon-cloudwatch-observability/values.yaml b/charts/amazon-cloudwatch-observability/values.yaml index 6c3ff5dc..166d65f3 100644 --- a/charts/amazon-cloudwatch-observability/values.yaml +++ b/charts/amazon-cloudwatch-observability/values.yaml @@ -1036,7 +1036,7 @@ manager: name: image: repository: cloudwatch-agent-operator - tag: 3.0.0 + tag: 3.0.1 repositoryDomainMap: public: public.ecr.aws/cloudwatch-agent cn-north-1: 934860584483.dkr.ecr.cn-north-1.amazonaws.com.cn