diff --git a/CHANGELOG.md b/CHANGELOG.md index 1462a510..76a671c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup + config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#619]). + ### Fixed - BREAKING: Use distinct ServiceAccounts for the Stacklets, so that multiple Stacklets can be @@ -11,6 +16,7 @@ All notable changes to this project will be documented in this file. restart ([#616]). [#616]: https://github.com/stackabletech/hdfs-operator/pull/616 +[#619]: https://github.com/stackabletech/hdfs-operator/pull/619 ## [24.11.0] - 2024-11-18 diff --git a/Cargo.lock b/Cargo.lock index 789f13e8..0369655d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -478,17 +478,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "digest" version = "0.10.7" @@ -1830,9 +1819,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "log", "once_cell", @@ -2212,15 +2201,15 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.82.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +version = "0.83.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "chrono", "clap", "const_format", "delegate", - "derivative", "dockerfile-parser", + "educe", "either", "futures 0.3.31", "indexmap", @@ -2251,7 +2240,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "darling", "proc-macro2", @@ -2262,7 +2251,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "kube", "semver", diff --git a/Cargo.toml b/Cargo.toml index f5e32b48..bb99567e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.8" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" } product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.7.0" } strum = { version = "0.26", features = ["derive"] } tokio = { version = "1.40", features = ["full"] } diff --git a/deploy/helm/hdfs-operator/crds/crds.yaml b/deploy/helm/hdfs-operator/crds/crds.yaml index 323897d7..d1fccfc8 100644 --- a/deploy/helm/hdfs-operator/crds/crds.yaml +++ b/deploy/helm/hdfs-operator/crds/crds.yaml @@ -261,6 +261,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -538,6 +542,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -840,6 +848,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1104,6 +1116,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1353,6 +1369,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1621,6 +1641,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 74660989..24d13138 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -238,6 +238,11 @@ pub struct CommonNodeConfig { /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. #[fragment_attrs(serde(default))] pub graceful_shutdown_timeout: Option, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } /// Configuration for a rolegroup of an unknown type. @@ -310,6 +315,13 @@ impl AnyNodeConfig { AnyNodeConfig::JournalNode(node) => node.logging.enable_vector_agent, } } + pub fn requested_secret_lifetime(&self) -> Option { + match self { + AnyNodeConfig::NameNode(node) => node.common.requested_secret_lifetime, + AnyNodeConfig::DataNode(node) => node.common.requested_secret_lifetime, + AnyNodeConfig::JournalNode(node) => node.common.requested_secret_lifetime, + } + } } #[derive( @@ -1074,6 +1086,8 @@ pub struct NameNodeConfig { } impl NameNodeConfigFragment { + const DEFAULT_NAME_NODE_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config(cluster_name: &str, role: &HdfsRole) -> Self { Self { resources: ResourcesFragment { @@ -1098,6 +1112,7 @@ impl NameNodeConfigFragment { common: CommonNodeConfigFragment { affinity: get_affinity(cluster_name, role), graceful_shutdown_timeout: Some(DEFAULT_NAME_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_NAME_NODE_SECRET_LIFETIME), }, } } @@ -1208,6 +1223,8 @@ pub struct DataNodeConfig { } impl DataNodeConfigFragment { + const DEFAULT_DATA_NODE_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config(cluster_name: &str, role: &HdfsRole) -> Self { Self { resources: ResourcesFragment { @@ -1237,6 +1254,7 @@ impl DataNodeConfigFragment { common: CommonNodeConfigFragment { affinity: get_affinity(cluster_name, role), graceful_shutdown_timeout: Some(DEFAULT_DATA_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_DATA_NODE_SECRET_LIFETIME), }, } } @@ -1324,6 +1342,7 @@ pub struct JournalNodeConfig { } impl JournalNodeConfigFragment { + const DEFAULT_JOURNAL_NODE_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); pub fn default_config(cluster_name: &str, role: &HdfsRole) -> Self { Self { resources: ResourcesFragment { @@ -1347,6 +1366,7 @@ impl JournalNodeConfigFragment { common: CommonNodeConfigFragment { affinity: get_affinity(cluster_name, role), graceful_shutdown_timeout: Some(DEFAULT_JOURNAL_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), + requested_secret_lifetime: Some(Self::DEFAULT_JOURNAL_NODE_SECRET_LIFETIME), }, } } diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 61519a54..d80fbf1f 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -89,6 +89,9 @@ type Result = std::result::Result; #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(IntoStaticStr))] pub enum Error { + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + #[snafu(display("object has no namespace"))] ObjectHasNoNamespace, @@ -272,6 +275,11 @@ impl ContainerConfig { .with_node_scope() .with_format(SecretFormat::TlsPkcs12) .with_tls_pkcs12_password(TLS_STORE_PASSWORD) + .with_auto_tls_cert_lifetime( + merged_config + .requested_secret_lifetime() + .context(MissingSecretLifetimeSnafu)?, + ) .build() .context(BuildSecretVolumeSnafu { volume_name: TLS_STORE_VOLUME_NAME, diff --git a/tests/templates/kuttl/kerberos/20-assert.yaml b/tests/templates/kuttl/kerberos/20-assert.yaml index 47260144..ace5b5ca 100644 --- a/tests/templates/kuttl/kerberos/20-assert.yaml +++ b/tests/templates/kuttl/kerberos/20-assert.yaml @@ -26,3 +26,11 @@ metadata: status: readyReplicas: 2 replicas: 2 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl -n $NAMESPACE get sts/hdfs-namenode-default -o yaml | yq -e '.spec.template.spec.volumes.[] | select(.name == "tls" and .ephemeral.volumeClaimTemplate.metadata.annotations."secrets.stackable.tech/backend.autotls.cert.lifetime" == "7d")' + - script: kubectl -n $NAMESPACE get sts/hdfs-datanode-default -o yaml | yq -e '.spec.template.spec.volumes.[] | select(.name == "tls" and .ephemeral.volumeClaimTemplate.metadata.annotations."secrets.stackable.tech/backend.autotls.cert.lifetime" == "1d")' + - script: kubectl -n $NAMESPACE get sts/hdfs-journalnode-default -o yaml | yq -e '.spec.template.spec.volumes.[] | select(.name == "tls" and .ephemeral.volumeClaimTemplate.metadata.annotations."secrets.stackable.tech/backend.autotls.cert.lifetime" == "2d")' diff --git a/tests/templates/kuttl/kerberos/20-install-hdfs.txt.j2 b/tests/templates/kuttl/kerberos/20-install-hdfs.txt.j2 index f26491f5..6f09314d 100644 --- a/tests/templates/kuttl/kerberos/20-install-hdfs.txt.j2 +++ b/tests/templates/kuttl/kerberos/20-install-hdfs.txt.j2 @@ -44,6 +44,7 @@ spec: replicas: 2 dataNodes: config: + requestedSecretLifetime: 1d logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} roleGroups: @@ -56,3 +57,5 @@ spec: roleGroups: default: replicas: 3 + config: + requestedSecretLifetime: 2d