diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index e01a5f997..2d9e4ef8e 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -178,6 +178,10 @@ spec: type: array items: type: string + dropped_pod_capabilities: + type: array + items: + type: string cluster_domain: type: string default: "cluster.local" @@ -324,8 +328,17 @@ spec: type: integer spilo_runasgroup: type: integer + spilo_runasnonroot: + type: boolean spilo_fsgroup: type: integer + spilo_seccompprofile: + type: object + properties: + localhostProfile: + type: string + type: + type: string spilo_privileged: type: boolean default: false diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index 6f938cf8f..ef4c7090f 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -466,8 +466,17 @@ spec: type: integer spiloRunAsGroup: type: integer + spiloRunAsNonRoot: + type: boolean spiloFSGroup: type: integer + spiloSeccompProfile: + type: object + properties: + localhostProfile: + type: string + type: + type: string standby: type: object properties: diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index bca269b0a..7f9f2ec1c 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -98,6 +98,10 @@ configKubernetes: # additional_pod_capabilities: # - "SYS_NICE" + # list of dropped capabilities for postgres container + # dropped_pod_capabilities: + # - "ALL" + # default DNS domain of K8s cluster where operator is running cluster_domain: cluster.local # additional labels assigned to the cluster objects @@ -196,10 +200,15 @@ configKubernetes: # set user and group for the spilo container (required to run Spilo as non-root process) # spilo_runasuser: 101 # spilo_runasgroup: 103 + # spilo_runasnonroot: true # group ID with write-access to volumes (required to run Spilo as non-root process) # spilo_fsgroup: 103 + # spilo_seccompprofile: + # type: Localhost + # localhostProfile: profiles/audit.json + # whether the Spilo container should run in privileged mode spilo_privileged: false # whether the Spilo container should run with additional permissions other than parent. diff --git a/docs/administrator.md b/docs/administrator.md index c44d08f90..164c75789 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -993,81 +993,7 @@ with `USE_WALG_BACKUP: "true"`. ### Google Cloud Platform setup -When using GCP, there are two authentication methods to allow the postgres -cluster to access buckets to write WAL-E logs: Workload Identity (recommended) -or using a GCP Service Account Key (legacy). - -#### Workload Identity setup - -To configure the operator on GCP using Workload Identity these prerequisites are -needed. - -* [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) enabled on the GKE cluster where the operator will be deployed -* A GCP service account with the proper IAM setup to access the GCS bucket for the WAL-E logs -* An IAM policy granting the Kubernetes service account the - `roles/iam.workloadIdentityUser` role on the GCP service account, e.g.: -```bash -gcloud iam service-accounts add-iam-policy-binding @.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:PROJECT_ID.svc.id.goog[/postgres-pod-custom]" -``` - -The configuration parameters that we will be using are: - -* `wal_gs_bucket` - -1. Create a custom Kubernetes service account to be used by Patroni running on -the postgres cluster pods, this service account should include an annotation -with the email address of the Google IAM service account used to communicate -with the GCS bucket, e.g. - -```yml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: postgres-pod-custom - namespace: - annotations: - iam.gke.io/gcp-service-account: @.iam.gserviceaccount.com -``` - -2. Specify the new custom service account in your [operator paramaters](./reference/operator_parameters.md) - -If using manual deployment or kustomize, this is done by setting -`pod_service_account_name` in your configuration file specified in the -[postgres-operator deployment](../manifests/postgres-operator.yaml#L37) - -If deploying the operator [using Helm](./quickstart.md#helm-chart), this can -be specified in the chart's values file, e.g.: - -```yml -... -podServiceAccount: - name: postgres-pod-custom -``` - -3. Setup your operator configuration values. Ensure that the operator's configuration -is set up like the following: -```yml -... -aws_or_gcp: - # additional_secret_mount: "" - # additional_secret_mount_path: "" - # aws_region: eu-central-1 - # kube_iam_role: "" - # log_s3_bucket: "" - # wal_s3_bucket: "" - wal_gs_bucket: "postgres-backups-bucket-28302F2" # name of bucket on where to save the WAL-E logs - # gcp_credentials: "" -... -``` - -Continue to shared steps below. - -#### GCP Service Account Key setup - -To configure the operator on GCP using a GCP service account key these -prerequisites are needed. +To configure the operator on GCP these prerequisites that are needed: * A service account with the proper IAM setup to access the GCS bucket for the WAL-E logs * The credentials file for the service account. @@ -1111,10 +1037,7 @@ aws_or_gcp: ... ``` -Once you have set up authentication using one of the two methods above, continue -with the remaining shared steps: - -1. Setup pod environment configmap that instructs the operator to use WAL-G, +3. Setup pod environment configmap that instructs the operator to use WAL-G, instead of WAL-E, for backup and restore. ```yml apiVersion: v1 @@ -1129,7 +1052,7 @@ data: CLONE_USE_WALG_RESTORE: "true" ``` -2. Then provide this configmap in postgres-operator settings: +4. Then provide this configmap in postgres-operator settings: ```yml ... # namespaced name of the ConfigMap with environment variables to populate on every pod @@ -1308,6 +1231,12 @@ configuration: volumeMounts: - mountPath: /custom-pgdata-mountpoint name: pgdata + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true - ... ``` diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index a54754473..06407b191 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -78,6 +78,11 @@ These parameters are grouped directly under the `spec` key in the manifest. This must be set to run the container without root. By default the container runs with root. This option only works for Spilo versions >= 1.6-p3. +* **spiloRunAsNonRoot** + determines whether the pod’s container should run as a non-root user. If set + to true, the image is validating at runtime to make sure that it does not run + as UID 0 (root) and won’t start the container if it does. + * **spiloFSGroup** the Persistent Volumes for the Spilo pods in the StatefulSet will be owned and writable by the group ID specified. This will override the **spilo_fsgroup** @@ -85,6 +90,14 @@ These parameters are grouped directly under the `spec` key in the manifest. requires a custom Spilo image. Note the FSGroup of a Pod cannot be changed without recreating a new Pod. Optional. +* **spiloSeccompProfile** + Seccomp (Secure Computing) is a feature in the Linux kernel that allows a + userspace program to create syscall filters. In the context of containers, + these syscall filters are collated into seccomp profiles that can be used to + restrict which syscalls and arguments are permitted. Applying seccomp profiles + to containers reduces the chance that a Linux kernel vulnerability will be + exploited. + * **enableMasterLoadBalancer** boolean flag to override the operator defaults (set by the `enable_master_load_balancer` parameter) to define whether to enable the load @@ -118,7 +131,7 @@ These parameters are grouped directly under the `spec` key in the manifest. a map of usernames to user flags for the users that should be created in the cluster by the operator. User flags are a list, allowed elements are `SUPERUSER`, `REPLICATION`, `INHERIT`, `LOGIN`, `NOLOGIN`, `CREATEROLE`, - `CREATEDB`, `BYPASSRLS`. A login user is created by default unless NOLOGIN is + `CREATEDB`, `BYPASSURL`. A login user is created by default unless NOLOGIN is specified, in which case the operator creates a role. One can specify empty flags by providing a JSON empty array '*[]*'. If the config option `enable_cross_namespace_secret` is enabled you can specify the namespace in @@ -483,6 +496,10 @@ defined in the sidecar dictionary: [CPU and memory requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container) for each sidecar container. Optional. +* **securityContext** + [A security context defining privilege and access control settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context) + for each sidecar container. Optional. + ### Requests CPU and memory requests for the sidecar container. diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 198870d77..f905d14c5 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -453,12 +453,25 @@ configuration they are grouped under the `kubernetes` key. This must be set to run the container without root. By default the container runs with root. This option only works for Spilo versions >= 1.6-p3. +* **spilo_runasnonroot** + determines whether the spilo container should run as a non-root user. If set + to true, the image is validating at runtime to make sure that it does not run + as UID 0 (root) and won’t start the container if it does. + * **spilo_fsgroup** the Persistent Volumes for the Spilo pods in the StatefulSet will be owned and writable by the group ID specified. This is required to run Spilo as a non-root process, but requires a custom Spilo image. Note the FSGroup of a Pod cannot be changed without recreating a new Pod. +* **spiloSeccompProfile** + Seccomp (Secure Computing) is a feature in the Linux kernel that allows a + userspace program to create syscall filters. In the context of containers, + these syscall filters are collated into seccomp profiles that can be used to + restrict which syscalls and arguments are permitted. Applying seccomp profiles + to containers reduces the chance that a Linux kernel vulnerability will be + exploited. + * **spilo_privileged** whether the Spilo container should run in privileged mode. Privileged mode is used for AWS volume resizing and not required if you don't need that @@ -475,6 +488,10 @@ configuration they are grouped under the `kubernetes` key. PodSecruityPolicy allows the capabilities listed here. Otherwise, the container will not start. The default is empty. +* **dropped_pod_capabilities** + list of capabilities to be dropped from the postgres container's + SecurityContext (e.g. ALL, SYS_NICE etc.). + * **master_pod_move_timeout** The period of time to wait for the success of migration of master pods from an unschedulable node. The migration includes Patroni switchovers to diff --git a/docs/user.md b/docs/user.md index fa82e3344..219be0544 100644 --- a/docs/user.md +++ b/docs/user.md @@ -987,6 +987,12 @@ spec: env: - name: "ENV_VAR_NAME" value: "any-k8s-env-things" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true ``` In addition to any environment variables you specify, the following environment diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 8d197a75d..0866fc482 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -93,7 +93,11 @@ spec: enableShmVolume: true # spiloRunAsUser: 101 # spiloRunAsGroup: 103 +# spiloRunAsNonRoot: true # spiloFSGroup: 103 +# spiloSeccompProfile: +# type: Localhost +# localhostProfile: profiles/audit.json # podAnnotations: # annotation.key: value # serviceAnnotations: @@ -186,6 +190,12 @@ spec: # env: # - name: "USEFUL_VAR" # value: "perhaps-true" +# securityContext: +# allowPrivilegeEscalation: false +# capabilities: +# drop: +# - ALL +# runAsNonRoot: true # Custom TLS certificate. Disabled unless tls.secretName has a value. tls: diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index e2fb21504..78cf1b457 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -5,6 +5,7 @@ metadata: data: # additional_owner_roles: "cron_admin" # additional_pod_capabilities: "SYS_NICE" + # dropped_pod_capabilities: "ALL" # additional_secret_mount: "some-secret-name" # additional_secret_mount_path: "/some/dir" api_port: "8080" @@ -150,7 +151,11 @@ data: spilo_allow_privilege_escalation: "true" # spilo_runasuser: 101 # spilo_runasgroup: 103 + # spilo_runasnonroot: true # spilo_fsgroup: 103 + # spilo_seccompprofile: + # type: Localhost + # localhostProfile: profiles/audit.json spilo_privileged: "false" storage_resize_mode: "pvc" super_username: postgres diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 8582c866a..fed706a49 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -176,6 +176,10 @@ spec: type: array items: type: string + dropped_pod_capabilities: + type: array + items: + type: string cluster_domain: type: string default: "cluster.local" @@ -322,8 +326,17 @@ spec: type: integer spilo_runasgroup: type: integer + spilo_runasnonroot: + type: boolean spilo_fsgroup: type: integer + spilo_seccompprofile: + type: object + properties: + localhostProfile: + type: string + type: + type: string spilo_privileged: type: boolean default: false diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 2e475910c..1f3572287 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -44,6 +44,8 @@ configuration: kubernetes: # additional_pod_capabilities: # - "SYS_NICE" + # dropped_pod_capabilities: + # - "ALL" cluster_domain: cluster.local cluster_labels: application: spilo @@ -100,7 +102,11 @@ configuration: spilo_allow_privilege_escalation: true # spilo_runasuser: 101 # spilo_runasgroup: 103 + # spilo_runasnonroot: true # spilo_fsgroup: 103 + # spilo_seccompprofile: + # type: Localhost + # localhostProfile: profiles/audit.json spilo_privileged: false storage_resize_mode: pvc # toleration: diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 6066abad1..7676dc46b 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -464,8 +464,17 @@ spec: type: integer spiloRunAsGroup: type: integer + spiloRunAsNonRoot: + type: boolean spiloFSGroup: type: integer + spiloSeccompProfile: + type: object + properties: + localhostProfile: + type: string + type: + type: string standby: type: object properties: diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index b82aa30b6..7dfa78313 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -728,9 +728,23 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ "spiloRunAsGroup": { Type: "integer", }, + "spiloRunAsNonRoot": { + Type: "boolean", + }, "spiloFSGroup": { Type: "integer", }, + "spiloSeccompProfile": { + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "localhostProfile": { + Type: "string", + }, + "type": { + Type: "string", + }, + }, + }, "standby": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ @@ -1243,6 +1257,14 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "dropped_pod_capabilities": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, "cluster_domain": { Type: "string", }, @@ -1441,9 +1463,23 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ "spilo_runasgroup": { Type: "integer", }, + "spilo_runasnonroot": { + Type: "boolean", + }, "spilo_fsgroup": { Type: "integer", }, + "spilo_seccompprofile": { + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "localhost_profile": { + Type: "string", + }, + "type": { + Type: "string", + }, + }, + }, "spilo_privileged": { Type: "boolean", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 4ff5ee81e..7f958e899 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -64,8 +64,11 @@ type KubernetesMetaConfiguration struct { SpiloAllowPrivilegeEscalation *bool `json:"spilo_allow_privilege_escalation,omitempty"` SpiloRunAsUser *int64 `json:"spilo_runasuser,omitempty"` SpiloRunAsGroup *int64 `json:"spilo_runasgroup,omitempty"` + SpiloRunAsNonRoot *bool `json:"spilo_runasnonroot,omitempty"` SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"` + SpiloSeccompProfile *SeccompProfile `json:"spilo_seccompprofile,omitempty"` AdditionalPodCapabilities []string `json:"additional_pod_capabilities,omitempty"` + DroppedPodCapabilities []string `json:"dropped_pod_capabilities,omitempty"` WatchedNamespace string `json:"watched_namespace,omitempty"` PDBNameFormat config.StringTemplate `json:"pdb_name_format,omitempty"` EnablePodDisruptionBudget *bool `json:"enable_pod_disruption_budget,omitempty"` @@ -104,6 +107,12 @@ type KubernetesMetaConfiguration struct { EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` } +// Seccompprofile defines the seccompProfile spec of SecurityContext resources +type SeccompProfile struct { + LocalhostProfile string `json:"localhost_profile,omitempty"` + Type v1.SeccompProfileType `json:"type,omitempty"` +} + // PostgresPodResourcesDefaults defines the spec of default resources type PostgresPodResourcesDefaults struct { DefaultCPURequest string `json:"default_cpu_request,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 67007b522..0a0b0adfa 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -39,9 +39,11 @@ type PostgresSpec struct { // deprecated field storing cluster name without teamId prefix ClusterName string `json:"-"` - SpiloRunAsUser *int64 `json:"spiloRunAsUser,omitempty"` - SpiloRunAsGroup *int64 `json:"spiloRunAsGroup,omitempty"` - SpiloFSGroup *int64 `json:"spiloFSGroup,omitempty"` + SpiloRunAsUser *int64 `json:"spiloRunAsUser,omitempty"` + SpiloRunAsGroup *int64 `json:"spiloRunAsGroup,omitempty"` + SpiloRunAsNonRoot *bool `json:"spiloRunAsNonRoot,omitempty"` + SpiloFSGroup *int64 `json:"spiloFSGroup,omitempty"` + SpiloSeccompProfile *SeccompProfile `json:"spiloSeccompProfile,omitempty"` // vars that enable load balancers are pointers because it is important to know if any of them is omitted from the Postgres manifest // in that case the var evaluates to nil and the value is taken from the operator config @@ -209,11 +211,12 @@ type CloneDescription struct { // Sidecar defines a container to be run in the same pod as the Postgres container. type Sidecar struct { - *Resources `json:"resources,omitempty"` - Name string `json:"name,omitempty"` - DockerImage string `json:"image,omitempty"` - Ports []v1.ContainerPort `json:"ports,omitempty"` - Env []v1.EnvVar `json:"env,omitempty"` + *Resources `json:"resources,omitempty"` + Name string `json:"name,omitempty"` + DockerImage string `json:"image,omitempty"` + Ports []v1.ContainerPort `json:"ports,omitempty"` + Env []v1.EnvVar `json:"env,omitempty"` + SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"` } // UserFlags defines flags (such as superuser, nologin) that could be assigned to individual users diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index a43c995c5..be1b70b2c 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -168,16 +168,31 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(int64) **out = **in } + if in.SpiloRunAsNonRoot != nil { + in, out := &in.SpiloRunAsNonRoot, &out.SpiloRunAsNonRoot + *out = new(bool) + **out = **in + } if in.SpiloFSGroup != nil { in, out := &in.SpiloFSGroup, &out.SpiloFSGroup *out = new(int64) **out = **in } + if in.SpiloSeccompProfile != nil { + in, out := &in.SpiloSeccompProfile, &out.SpiloSeccompProfile + *out = new(SeccompProfile) + **out = **in + } if in.AdditionalPodCapabilities != nil { in, out := &in.AdditionalPodCapabilities, &out.AdditionalPodCapabilities *out = make([]string, len(*in)) copy(*out, *in) } + if in.DroppedPodCapabilities != nil { + in, out := &in.DroppedPodCapabilities, &out.DroppedPodCapabilities + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.EnablePodDisruptionBudget != nil { in, out := &in.EnablePodDisruptionBudget, &out.EnablePodDisruptionBudget *out = new(bool) @@ -646,11 +661,21 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { *out = new(int64) **out = **in } + if in.SpiloRunAsNonRoot != nil { + in, out := &in.SpiloRunAsNonRoot, &out.SpiloRunAsNonRoot + *out = new(bool) + **out = **in + } if in.SpiloFSGroup != nil { in, out := &in.SpiloFSGroup, &out.SpiloFSGroup *out = new(int64) **out = **in } + if in.SpiloSeccompProfile != nil { + in, out := &in.SpiloSeccompProfile, &out.SpiloSeccompProfile + *out = new(SeccompProfile) + **out = **in + } if in.EnableMasterLoadBalancer != nil { in, out := &in.EnableMasterLoadBalancer, &out.EnableMasterLoadBalancer *out = new(bool) @@ -1195,6 +1220,22 @@ func (in *ScalyrConfiguration) DeepCopy() *ScalyrConfiguration { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SeccompProfile) DeepCopyInto(out *SeccompProfile) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeccompProfile. +func (in *SeccompProfile) DeepCopy() *SeccompProfile { + if in == nil { + return nil + } + out := new(SeccompProfile) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Sidecar) DeepCopyInto(out *Sidecar) { *out = *in @@ -1215,6 +1256,11 @@ func (in *Sidecar) DeepCopyInto(out *Sidecar) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } return } diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 3de5e430f..71c57cb71 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -454,15 +454,26 @@ func getLocalAndBoostrapPostgreSQLParameters(parameters map[string]string) (loca return } -func generateCapabilities(capabilities []string) *v1.Capabilities { - additionalCapabilities := make([]v1.Capability, 0, len(capabilities)) +func generateCapabilities(capabilities []string) []v1.Capability { + capabilities_ := make([]v1.Capability, 0, len(capabilities)) for _, capability := range capabilities { - additionalCapabilities = append(additionalCapabilities, v1.Capability(strings.ToUpper(capability))) + capabilities_ = append(capabilities_, v1.Capability(strings.ToUpper(capability))) } - if len(additionalCapabilities) > 0 { - return &v1.Capabilities{ - Add: additionalCapabilities, - } + return capabilities_ +} + +func generatePodCapabilities(additionalCapabilities []string, droppedCapabilities []string) *v1.Capabilities { + additionalCapabilities_ := generateCapabilities(additionalCapabilities) + droppedCapabilities_ := generateCapabilities(droppedCapabilities) + capabilities := v1.Capabilities{} + if len(additionalCapabilities_) > 0 { + capabilities.Add = additionalCapabilities_ + } + if len(droppedCapabilities_) > 0 { + capabilities.Drop = droppedCapabilities_ + } + if len(additionalCapabilities_) > 0 || len(droppedCapabilities_) > 0 { + return &capabilities } return nil } @@ -647,7 +658,7 @@ func generateContainer( volumeMounts []v1.VolumeMount, privilegedMode bool, privilegeEscalationMode *bool, - additionalPodCapabilities *v1.Capabilities, + podCapabilities *v1.Capabilities, ) *v1.Container { return &v1.Container{ Name: name, @@ -674,7 +685,7 @@ func generateContainer( AllowPrivilegeEscalation: privilegeEscalationMode, Privileged: &privilegedMode, ReadOnlyRootFilesystem: util.False(), - Capabilities: additionalPodCapabilities, + Capabilities: podCapabilities, }, } } @@ -696,7 +707,6 @@ func (c *Cluster) generateSidecarContainers(sidecars []acidv1.Sidecar, if err != nil { return nil, err } - sc := getSidecarContainer(sidecar, startIndex+index, resources) result = append(result, *sc) } @@ -774,7 +784,9 @@ func (c *Cluster) generatePodTemplate( tolerationsSpec *[]v1.Toleration, spiloRunAsUser *int64, spiloRunAsGroup *int64, + spiloRunAsNonRoot *bool, spiloFSGroup *int64, + spiloSeccompProfile *acidv1.SeccompProfile, nodeAffinity *v1.Affinity, schedulerName *string, terminateGracePeriod int64, @@ -803,9 +815,22 @@ func (c *Cluster) generatePodTemplate( securityContext.RunAsGroup = spiloRunAsGroup } + if spiloRunAsNonRoot != nil { + securityContext.RunAsNonRoot = spiloRunAsNonRoot + } + if spiloFSGroup != nil { securityContext.FSGroup = spiloFSGroup } + + c.logger.Debug("Set spiloSeccompProfile") + if spiloSeccompProfile != nil { + securityContext.SeccompProfile = &v1.SeccompProfile{} + securityContext.SeccompProfile.Type = spiloSeccompProfile.Type + if spiloSeccompProfile.LocalhostProfile != "" { + securityContext.SeccompProfile.LocalhostProfile = &spiloSeccompProfile.LocalhostProfile + } + } podSpec := v1.PodSpec{ ServiceAccountName: podServiceAccountName, @@ -1158,6 +1183,7 @@ func getSidecarContainer(sidecar acidv1.Sidecar, index int, resources *v1.Resour Resources: *resources, Env: sidecar.Env, Ports: sidecar.Ports, + SecurityContext: sidecar.SecurityContext, } } @@ -1279,11 +1305,31 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef effectiveRunAsGroup = spec.SpiloRunAsGroup } + effectiveRunAsNonRoot := c.OpConfig.Resources.SpiloRunAsNonRoot + if spec.SpiloRunAsNonRoot != nil { + effectiveRunAsNonRoot = spec.SpiloRunAsNonRoot + } + effectiveFSGroup := c.OpConfig.Resources.SpiloFSGroup if spec.SpiloFSGroup != nil { effectiveFSGroup = spec.SpiloFSGroup } + c.logger.Debug("Set effectiveSeccompProfile") + var effectiveSeccompProfile *acidv1.SeccompProfile + if c.OpConfig.Resources.SpiloSeccompProfile != nil { + effectiveSeccompProfile = &acidv1.SeccompProfile {} + if c.OpConfig.Resources.SpiloSeccompProfile.LocalhostProfile != "" { + effectiveSeccompProfile.LocalhostProfile = c.OpConfig.Resources.SpiloSeccompProfile.LocalhostProfile + } + if c.OpConfig.Resources.SpiloSeccompProfile.Type != "" { + effectiveSeccompProfile.Type = v1.SeccompProfileType(c.OpConfig.Resources.SpiloSeccompProfile.Type) + } + } + if spec.SpiloSeccompProfile != nil { + effectiveSeccompProfile = spec.SpiloSeccompProfile + } + volumeMounts := generateVolumeMounts(spec.Volume) // configure TLS with a custom secret volume @@ -1349,7 +1395,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef volumeMounts, c.OpConfig.Resources.SpiloPrivileged, c.OpConfig.Resources.SpiloAllowPrivilegeEscalation, - generateCapabilities(c.OpConfig.AdditionalPodCapabilities), + generatePodCapabilities(c.OpConfig.AdditionalPodCapabilities, c.OpConfig.DroppedPodCapabilities), ) // Patroni responds 200 to probe only if it either owns the leader lock or postgres is running and DCS is accessible @@ -1430,7 +1476,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef &tolerationSpec, effectiveRunAsUser, effectiveRunAsGroup, + effectiveRunAsNonRoot, effectiveFSGroup, + effectiveSeccompProfile, c.nodeAffinity(c.OpConfig.NodeReadinessLabel, spec.NodeAffinity), spec.SchedulerName, int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), @@ -2185,6 +2233,8 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { nil, nil, nil, + nil, + nil, c.nodeAffinity(c.OpConfig.NodeReadinessLabel, nil), nil, int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), @@ -2346,6 +2396,8 @@ func (c *Cluster) generateLogicalBackupPodEnvVars() []v1.EnvVar { envVars = append(envVars, v1.EnvVar{Name: "AWS_SECRET_ACCESS_KEY", Value: c.OpConfig.LogicalBackup.LogicalBackupS3SecretAccessKey}) } + c.logger.Debugf("Generated logical backup env vars") + c.logger.Debugf("%v", envVars) return envVars } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 226e5ced5..a13a20b0e 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1999,6 +1999,8 @@ func TestSidecars(t *testing.T) { var err error var spec acidv1.PostgresSpec var cluster *Cluster + allowPrivilegeEscalation := false + runAsNonRoot := true generateKubernetesResources := func(cpuRequest string, cpuLimit string, memoryRequest string, memoryLimit string) v1.ResourceRequirements { parsedCPURequest, err := resource.ParseQuantity(cpuRequest) @@ -2051,6 +2053,13 @@ func TestSidecars(t *testing.T) { Name: "replace-sidecar", DockerImage: "override-image", }, + acidv1.Sidecar{ + Name: "cluster-specific-sidecar-with-security-context", + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + RunAsNonRoot: &runAsNonRoot, + }, + }, }, } @@ -2141,7 +2150,7 @@ func TestSidecars(t *testing.T) { } // deduplicated sidecars and Patroni - assert.Equal(t, 7, len(s.Spec.Template.Spec.Containers), "wrong number of containers") + assert.Equal(t, 8, len(s.Spec.Template.Spec.Containers), "wrong number of containers") // cluster specific sidecar assert.Contains(t, s.Spec.Template.Spec.Containers, v1.Container{ @@ -2152,6 +2161,19 @@ func TestSidecars(t *testing.T) { VolumeMounts: mounts, }) + // cluster specific sidecar with security context + assert.Contains(t, s.Spec.Template.Spec.Containers, v1.Container{ + Name: "cluster-specific-sidecar-with-security-context", + Env: env, + Resources: generateKubernetesResources("200m", "500m", "0.7Gi", "1.3Gi"), + ImagePullPolicy: v1.PullIfNotPresent, + VolumeMounts: mounts, + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + RunAsNonRoot: &runAsNonRoot, + }, + }) + // container specific resources expectedResources := generateKubernetesResources("210m", "510m", "0.8Gi", "1.4Gi") assert.Equal(t, expectedResources.Requests[v1.ResourceCPU], s.Spec.Template.Spec.Containers[2].Resources.Requests[v1.ResourceCPU]) @@ -2159,6 +2181,8 @@ func TestSidecars(t *testing.T) { assert.Equal(t, expectedResources.Requests[v1.ResourceMemory], s.Spec.Template.Spec.Containers[2].Resources.Requests[v1.ResourceMemory]) assert.Equal(t, expectedResources.Limits[v1.ResourceMemory], s.Spec.Template.Spec.Containers[2].Resources.Limits[v1.ResourceMemory]) + assert.Equal(t, &runAsNonRoot, s.Spec.Template.Spec.Containers[4].SecurityContext.RunAsNonRoot) + // deprecated global sidecar assert.Contains(t, s.Spec.Template.Spec.Containers, v1.Container{ Name: "deprecated-global-sidecar", @@ -3141,44 +3165,182 @@ func TestGenerateLogicalBackupJob(t *testing.T) { func TestGenerateCapabilities(t *testing.T) { tests := []struct { subTest string - configured []string + addConfigured []string + dropConfigured []string capabilities *v1.Capabilities err error }{ { subTest: "no capabilities", - configured: nil, + addConfigured: nil, + dropConfigured: nil, capabilities: nil, err: fmt.Errorf("could not parse capabilities configuration of nil"), }, { subTest: "empty capabilities", - configured: []string{}, + addConfigured: []string{}, + dropConfigured: []string{}, capabilities: nil, err: fmt.Errorf("could not parse empty capabilities configuration"), }, { subTest: "configured capability", - configured: []string{"SYS_NICE"}, + addConfigured: []string{"SYS_NICE"}, + dropConfigured: []string{"ALL"}, capabilities: &v1.Capabilities{ Add: []v1.Capability{"SYS_NICE"}, + Drop: []v1.Capability{"ALL"}, }, err: fmt.Errorf("could not generate one configured capability"), }, { subTest: "configured capabilities", - configured: []string{"SYS_NICE", "CHOWN"}, + addConfigured: []string{"SYS_NICE", "CHOWN"}, + dropConfigured: []string{"ALL"}, capabilities: &v1.Capabilities{ Add: []v1.Capability{"SYS_NICE", "CHOWN"}, + Drop: []v1.Capability{"ALL"}, }, err: fmt.Errorf("could not generate multiple configured capabilities"), }, } for _, tt := range tests { - caps := generateCapabilities(tt.configured) + caps := generatePodCapabilities(tt.addConfigured, tt.dropConfigured) if !reflect.DeepEqual(caps, tt.capabilities) { t.Errorf("%s %s: expected `%v` but got `%v`", t.Name(), tt.subTest, tt.capabilities, caps) } } } + +func TestGenerateSeccompProfile(t *testing.T) { + client, _ := newFakeK8sTestClient() + clusterName := "acid-test-cluster" + namespace := "default" + spiloSeccompProfile := config.SeccompProfile{ + Type: "Localhost", + LocalhostProfile: "profiles/audit.json", + } + + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + TeamID: "myapp", NumberOfInstances: 1, + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + }, + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + } + + var cluster = New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + Resources: config.Resources{ + SpiloSeccompProfile: &spiloSeccompProfile, + }, + }, + }, client, pg, logger, eventRecorder) + + // create a statefulset + sts, err := cluster.createStatefulSet() + assert.NoError(t, err) + + assert.Equal(t, spiloSeccompProfile.Type, string(sts.Spec.Template.Spec.SecurityContext.SeccompProfile.Type), "has a SeccompProfileType assigned") + assert.Equal(t, spiloSeccompProfile.LocalhostProfile, *sts.Spec.Template.Spec.SecurityContext.SeccompProfile.LocalhostProfile, "has a LocalhostProfile assigned") +} + +func TestGenerateEmtySeccompProfile(t *testing.T) { + client, _ := newFakeK8sTestClient() + clusterName := "acid-test-cluster" + namespace := "default" + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + TeamID: "myapp", NumberOfInstances: 1, + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + }, + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + } + + var cluster = New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + }, + }, client, pg, logger, eventRecorder) + + // create a statefulset + sts, err := cluster.createStatefulSet() + assert.NoError(t, err) + + assert.Nil(t, sts.Spec.Template.Spec.SecurityContext.SeccompProfile, "does not have a SeccompProfile assigned") +} + +func TestGenerateRunAsNonRoot(t *testing.T) { + var client, _ = newFakeK8sTestClient() + var clusterName = "acid-test-cluster" + var namespace = "default" + var spiloRunAsUser = int64(101) + var spiloRunAsGroup = int64(103) + var spiloRunAsNonRoot = bool(true) + var spiloFSGroup = int64(103) + + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + TeamID: "myapp", NumberOfInstances: 1, + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + }, + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + } + + var cluster = New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + Resources: config.Resources{ + SpiloRunAsUser: &spiloRunAsUser, + SpiloRunAsGroup: &spiloRunAsGroup, + SpiloRunAsNonRoot: &spiloRunAsNonRoot, + SpiloFSGroup: &spiloFSGroup, + }, + }, + }, client, pg, logger, eventRecorder) + + // create a statefulset + sts, err := cluster.createStatefulSet() + assert.NoError(t, err) + + assert.Equal(t, spiloRunAsUser, *sts.Spec.Template.Spec.SecurityContext.RunAsUser, "has a RunAsUser assigned") + assert.Equal(t, spiloRunAsGroup, *sts.Spec.Template.Spec.SecurityContext.RunAsGroup, "has a RunAsGroup assigned") + assert.Equal(t, spiloRunAsNonRoot, *sts.Spec.Template.Spec.SecurityContext.RunAsNonRoot, "has a RunAsNonRoot assigned") + assert.Equal(t, spiloFSGroup, *sts.Spec.Template.Spec.SecurityContext.FSGroup, "has a FSGroup assigned") +} diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index a66ece2fa..1e219288b 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -77,8 +77,17 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.SpiloAllowPrivilegeEscalation = util.CoalesceBool(fromCRD.Kubernetes.SpiloAllowPrivilegeEscalation, util.True()) result.SpiloRunAsUser = fromCRD.Kubernetes.SpiloRunAsUser result.SpiloRunAsGroup = fromCRD.Kubernetes.SpiloRunAsGroup + result.SpiloRunAsNonRoot = fromCRD.Kubernetes.SpiloRunAsNonRoot result.SpiloFSGroup = fromCRD.Kubernetes.SpiloFSGroup + if fromCRD.Kubernetes.SpiloSeccompProfile != nil { + result.SpiloSeccompProfile = &config.SeccompProfile{} + result.SpiloSeccompProfile.Type = string(fromCRD.Kubernetes.SpiloSeccompProfile.Type) + if fromCRD.Kubernetes.SpiloSeccompProfile.LocalhostProfile != "" { + result.SpiloSeccompProfile.LocalhostProfile = fromCRD.Kubernetes.SpiloSeccompProfile.LocalhostProfile + } + } result.AdditionalPodCapabilities = fromCRD.Kubernetes.AdditionalPodCapabilities + result.DroppedPodCapabilities = fromCRD.Kubernetes.DroppedPodCapabilities result.ClusterDomain = util.Coalesce(fromCRD.Kubernetes.ClusterDomain, "cluster.local") result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index df1cf6bb8..c27aa917b 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -32,12 +32,15 @@ type Resources struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` SpiloRunAsUser *int64 `name:"spilo_runasuser"` SpiloRunAsGroup *int64 `name:"spilo_runasgroup"` + SpiloRunAsNonRoot *bool `name:"spilo_runasnonroot"` SpiloFSGroup *int64 `name:"spilo_fsgroup"` + SpiloSeccompProfile *SeccompProfile `name:"spilo_seccompprofile"` PodPriorityClassName string `name:"pod_priority_class_name"` ClusterDomain string `name:"cluster_domain" default:"cluster.local"` SpiloPrivileged bool `name:"spilo_privileged" default:"false"` SpiloAllowPrivilegeEscalation *bool `name:"spilo_allow_privilege_escalation" default:"true"` AdditionalPodCapabilities []string `name:"additional_pod_capabilities" default:""` + DroppedPodCapabilities []string `name:"dropped_pod_capabilities" default:""` ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"` InheritedLabels []string `name:"inherited_labels" default:""` InheritedAnnotations []string `name:"inherited_annotations" default:""` @@ -67,6 +70,13 @@ type Resources struct { IgnoreInstanceLimitsAnnotationKey string `name:"ignore_instance_limits_annotation_key"` } + +// Seccompprofile defines the seccompProfile spec of SecurityContext resources +type SeccompProfile struct { + LocalhostProfile string `name:"localhost_profile"` + Type string `name:"type"` +} + type InfrastructureRole struct { // Name of a secret which describes the role, and optionally name of a // configmap with an extra information