diff --git a/CHANGELOG.md b/CHANGELOG.md index 72ac4d0f..86c8a611 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file. - The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#619]). - Run a `containerdebug` process in the background of each HDFS container to collect debugging information ([#629]). +- Support configuring JVM arguments ([#636]). - Aggregate emitted Kubernetes events on the CustomResources ([#643]). ### Changed @@ -16,9 +17,11 @@ All notable changes to this project will be documented in this file. - Switch the WebUI liveness probe from `httpGet` to checking the tcp socket. This helps with setups where configOverrides are used to enable security on the HTTP interfaces. As this results in `401` HTTP responses (instead of `200`), this previously failed the liveness checks. +- Set the JVM argument `-Xms` in addition to `-Xmx` (with the same value). This ensure consistent JVM configs across our products ([#636]). [#619]: https://github.com/stackabletech/hdfs-operator/pull/619 [#629]: https://github.com/stackabletech/hdfs-operator/pull/629 +[#636]: https://github.com/stackabletech/hdfs-operator/pull/636 [#643]: https://github.com/stackabletech/hdfs-operator/pull/643 ## [24.11.1] - 2025-01-10 diff --git a/Cargo.lock b/Cargo.lock index c7748d5d..36009cb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -396,9 +396,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -2020,9 +2020,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" [[package]] name = "rustls-webpki" @@ -2043,9 +2043,9 @@ checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "schannel" @@ -2882,9 +2882,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unicode-ident" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11cd88e12b17c6494200a9c1b683a04fcac9573ed74cd1b62aeb2727c5592243" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-xid" @@ -3160,9 +3160,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.24" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a" +checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310" dependencies = [ "memchr", ] diff --git a/Cargo.nix b/Cargo.nix index bae631c2..16b4d2b0 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -1164,9 +1164,9 @@ rec { }; "cpufeatures" = rec { crateName = "cpufeatures"; - version = "0.2.16"; + version = "0.2.17"; edition = "2018"; - sha256 = "1hy466fkhxjbb16i7na95wz8yr14d0kd578pwzj5lbkz14jh5f0n"; + sha256 = "10023dnnaghhdl70xcds12fsx2b966sxbxjq5sxs49mvxqw5ivar"; authors = [ "RustCrypto Developers" ]; @@ -1174,21 +1174,25 @@ rec { { name = "libc"; packageId = "libc"; + usesDefaultFeatures = false; target = { target, features }: (stdenv.hostPlatform.rust.rustcTarget == "aarch64-linux-android"); } { name = "libc"; packageId = "libc"; + usesDefaultFeatures = false; target = { target, features }: (("aarch64" == target."arch" or null) && ("linux" == target."os" or null)); } { name = "libc"; packageId = "libc"; + usesDefaultFeatures = false; target = { target, features }: (("aarch64" == target."arch" or null) && ("apple" == target."vendor" or null)); } { name = "libc"; packageId = "libc"; + usesDefaultFeatures = false; target = { target, features }: (("loongarch64" == target."arch" or null) && ("linux" == target."os" or null)); } ]; @@ -6312,9 +6316,9 @@ rec { }; "rustls-pki-types" = rec { crateName = "rustls-pki-types"; - version = "1.10.1"; + version = "1.11.0"; edition = "2021"; - sha256 = "0dqb3d0cbld1yrp084wyzgw6yk3qzzic8l5pbs1b6bcjzzk4ggyj"; + sha256 = "0755isc0x5iymm3wsn59s0ad1pm9zidw7p34qfqlsjsac9jf4z4i"; libName = "rustls_pki_types"; features = { "default" = [ "alloc" ]; @@ -6371,9 +6375,9 @@ rec { }; "ryu" = rec { crateName = "ryu"; - version = "1.0.18"; + version = "1.0.19"; edition = "2018"; - sha256 = "17xx2s8j1lln7iackzd9p0sv546vjq71i779gphjq923vjh5pjzk"; + sha256 = "1pg6a0b80m32ahygsdkwzs3bfydk4snw695akz4rqxj4lv8a58bf"; authors = [ "David Tolnay " ]; @@ -9209,9 +9213,9 @@ rec { }; "unicode-ident" = rec { crateName = "unicode-ident"; - version = "1.0.15"; + version = "1.0.16"; edition = "2018"; - sha256 = "0hr2b72jf9zb5avd2k6p7rbwkjjgl21vdhd90114kihp5ghqik8i"; + sha256 = "0d2hji0i16naw43l02dplrz8fbv625n7475s463iqw4by1hd2452"; libName = "unicode_ident"; authors = [ "David Tolnay " @@ -10980,9 +10984,9 @@ rec { }; "winnow" = rec { crateName = "winnow"; - version = "0.6.24"; + version = "0.6.25"; edition = "2021"; - sha256 = "0fm0z1gk9wb47s1jhh889isz657kavd1yb3fhzbjmi657icimmy8"; + sha256 = "046k3772481hxa596krj2fckpzf61983aps3nipcrihjhbs9ssdd"; dependencies = [ { name = "memchr"; diff --git a/deploy/helm/hdfs-operator/crds/crds.yaml b/deploy/helm/hdfs-operator/crds/crds.yaml index d1fccfc8..87731606 100644 --- a/deploy/helm/hdfs-operator/crds/crds.yaml +++ b/deploy/helm/hdfs-operator/crds/crds.yaml @@ -373,6 +373,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -654,6 +680,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -951,6 +1003,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -1219,6 +1297,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -1472,6 +1576,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -1744,6 +1874,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index fcf67775..64655370 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -38,7 +38,8 @@ use stackable_operator::{ spec::{ContainerLogConfig, Logging}, }, role_utils::{ - GenericProductSpecificCommonConfig, GenericRoleConfig, Role, RoleGroup, RoleGroupRef, + self, GenericRoleConfig, JavaCommonConfig, JvmArgumentOverrides, Role, RoleGroup, + RoleGroupRef, }, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, @@ -72,7 +73,7 @@ pub enum Error { #[snafu(display("object has no associated namespace"))] NoNamespace, - #[snafu(display("missing node role {role:?}"))] + #[snafu(display("missing role {role:?}"))] MissingRole { role: String }, #[snafu(display("missing role group {role_group:?} for role {role:?}"))] @@ -103,6 +104,9 @@ pub enum Error { #[snafu(display("failed to build role-group selector label"))] BuildRoleGroupSelectorLabel { source: LabelError }, + + #[snafu(display("failed to merge jvm argument overrides"))] + MergeJvmArgumentOverrides { source: role_utils::Error }, } /// An HDFS cluster stacklet. This resource is managed by the Stackable operator for Apache Hadoop HDFS. @@ -140,15 +144,15 @@ pub struct HdfsClusterSpec { // no doc string - See Role struct #[serde(default, skip_serializing_if = "Option::is_none")] - pub name_nodes: Option>, + pub name_nodes: Option>, // no doc string - See Role struct #[serde(default, skip_serializing_if = "Option::is_none")] - pub data_nodes: Option>, + pub data_nodes: Option>, // no doc string - See Role struct #[serde(default, skip_serializing_if = "Option::is_none")] - pub journal_nodes: Option>, + pub journal_nodes: Option>, } #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] @@ -541,7 +545,7 @@ impl HdfsCluster { pub fn namenode_rolegroup( &self, role_group: &str, - ) -> Option<&RoleGroup> { + ) -> Option<&RoleGroup> { self.spec.name_nodes.as_ref()?.role_groups.get(role_group) } @@ -549,7 +553,7 @@ impl HdfsCluster { pub fn datanode_rolegroup( &self, role_group: &str, - ) -> Option<&RoleGroup> { + ) -> Option<&RoleGroup> { self.spec.data_nodes.as_ref()?.role_groups.get(role_group) } @@ -557,7 +561,7 @@ impl HdfsCluster { pub fn journalnode_rolegroup( &self, role_group: &str, - ) -> Option<&RoleGroup> { + ) -> Option<&RoleGroup> { self.spec .journal_nodes .as_ref()? @@ -565,14 +569,49 @@ impl HdfsCluster { .get(role_group) } - pub fn role_config(&self, role: &HdfsRole) -> Option<&GenericRoleConfig> { - match role { + pub fn role_config(&self, hdfs_role: &HdfsRole) -> Option<&GenericRoleConfig> { + match hdfs_role { HdfsRole::NameNode => self.spec.name_nodes.as_ref().map(|nn| &nn.role_config), HdfsRole::DataNode => self.spec.data_nodes.as_ref().map(|dn| &dn.role_config), HdfsRole::JournalNode => self.spec.journal_nodes.as_ref().map(|jn| &jn.role_config), } } + pub fn get_merged_jvm_argument_overrides( + &self, + hdfs_role: &HdfsRole, + role_group: &str, + operator_generated: &JvmArgumentOverrides, + ) -> Result { + match hdfs_role { + HdfsRole::JournalNode => self + .spec + .journal_nodes + .as_ref() + .with_context(|| MissingRoleSnafu { + role: HdfsRole::JournalNode.to_string(), + })? + .get_merged_jvm_argument_overrides(role_group, operator_generated), + HdfsRole::NameNode => self + .spec + .name_nodes + .as_ref() + .with_context(|| MissingRoleSnafu { + role: HdfsRole::NameNode.to_string(), + })? + .get_merged_jvm_argument_overrides(role_group, operator_generated), + HdfsRole::DataNode => self + .spec + .data_nodes + .as_ref() + .with_context(|| MissingRoleSnafu { + role: HdfsRole::DataNode.to_string(), + })? + .get_merged_jvm_argument_overrides(role_group, operator_generated), + } + .context(MergeJvmArgumentOverridesSnafu) + } + pub fn pod_overrides_for_role(&self, role: &HdfsRole) -> Option<&PodTemplateSpec> { match role { HdfsRole::NameNode => self @@ -761,6 +800,7 @@ impl HdfsCluster { } } + #[allow(clippy::type_complexity)] pub fn build_role_properties( &self, ) -> Result< @@ -768,7 +808,11 @@ impl HdfsCluster { String, ( Vec, - Role>, + Role< + impl Configuration, + GenericRoleConfig, + JavaCommonConfig, + >, ), >, Error, diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs new file mode 100644 index 00000000..9976b95e --- /dev/null +++ b/rust/operator-binary/src/config/jvm.rs @@ -0,0 +1,212 @@ +use snafu::{ResultExt, Snafu}; +use stackable_hdfs_crd::{constants::JVM_SECURITY_PROPERTIES_FILE, HdfsCluster, HdfsRole}; +use stackable_operator::{ + k8s_openapi::api::core::v1::ResourceRequirements, + memory::{BinaryMultiple, MemoryQuantity}, + role_utils::JvmArgumentOverrides, +}; + +use crate::security::kerberos::KERBEROS_CONTAINER_PATH; + +const JVM_HEAP_FACTOR: f32 = 0.8; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("invalid java heap config for {role:?}"))] + InvalidJavaHeapConfig { + source: stackable_operator::memory::Error, + role: String, + }, + + #[snafu(display("failed to merge jvm argument overrides"))] + MergeJvmArgumentOverrides { source: stackable_hdfs_crd::Error }, +} + +// All init or sidecar containers must have access to the following settings. +// As the Prometheus metric emitter is not part of this config it's safe to use for hdfs cli tools as well. +// This will not only enable the init containers to work, but also the user to run e.g. +// `bin/hdfs dfs -ls /` without getting `Caused by: java.lang.IllegalArgumentException: KrbException: Cannot locate default realm` +// because the `-Djava.security.krb5.conf` setting is missing +pub fn construct_global_jvm_args(kerberos_enabled: bool) -> String { + let mut jvm_args = Vec::new(); + + if kerberos_enabled { + jvm_args.push(format!( + "-Djava.security.krb5.conf={KERBEROS_CONTAINER_PATH}/krb5.conf" + )); + } + + // We do *not* add user overrides to the global JVM args, but only the role specific JVM arguments. + // This allows users to configure stuff for the server (probably what they want to do), without + // also influencing e.g. startup scripts. + // + // However, this is just an assumption. If it is wrong users can still envOverride the global + // JVM args. + // + // Please feel absolutely free to change this behavior! + jvm_args.join(" ") +} + +pub fn construct_role_specific_jvm_args( + hdfs: &HdfsCluster, + hdfs_role: &HdfsRole, + role_group: &str, + kerberos_enabled: bool, + resources: Option<&ResourceRequirements>, + config_dir: &str, + metrics_port: u16, +) -> Result { + let mut jvm_args = Vec::new(); + + if let Some(memory_limit) = resources.and_then(|r| r.limits.as_ref()?.get("memory")) { + let memory_limit = MemoryQuantity::try_from(memory_limit).with_context(|_| { + InvalidJavaHeapConfigSnafu { + role: hdfs_role.to_string(), + } + })?; + let heap = memory_limit.scale_to(BinaryMultiple::Mebi) * JVM_HEAP_FACTOR; + let heap = heap + .format_for_java() + .with_context(|_| InvalidJavaHeapConfigSnafu { + role: hdfs_role.to_string(), + })?; + + jvm_args.push(format!("-Xms{heap}")); + jvm_args.push(format!("-Xmx{heap}")); + } + + jvm_args.extend([ + format!("-Djava.security.properties={config_dir}/{JVM_SECURITY_PROPERTIES_FILE}"), + format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={metrics_port}:/stackable/jmx/{hdfs_role}.yaml") + ]); + if kerberos_enabled { + jvm_args.push(format!( + "-Djava.security.krb5.conf={KERBEROS_CONTAINER_PATH}/krb5.conf" + )); + } + + let operator_generated = JvmArgumentOverrides::new_with_only_additions(jvm_args); + let merged_jvm_args = hdfs + .get_merged_jvm_argument_overrides(hdfs_role, role_group, &operator_generated) + .context(MergeJvmArgumentOverridesSnafu)?; + + Ok(merged_jvm_args + .effective_jvm_config_after_merging() + .join(" ")) +} + +#[cfg(test)] +mod tests { + use stackable_hdfs_crd::{constants::DEFAULT_NAME_NODE_METRICS_PORT, HdfsCluster}; + + use crate::container::ContainerConfig; + + use super::*; + + #[test] + fn test_global_jvm_args() { + assert_eq!(construct_global_jvm_args(false), ""); + assert_eq!( + construct_global_jvm_args(true), + format!("-Djava.security.krb5.conf={KERBEROS_CONTAINER_PATH}/krb5.conf") + ); + } + + #[test] + fn test_jvm_config_defaults_without_kerberos() { + let input = r#" + apiVersion: hdfs.stackable.tech/v1alpha1 + kind: HdfsCluster + metadata: + name: hdfs + spec: + image: + productVersion: 3.4.0 + clusterConfig: + zookeeperConfigMapName: hdfs-zk + nameNodes: + roleGroups: + default: + replicas: 1 + "#; + let jvm_config = construct_test_role_specific_jvm_args(input, false); + + assert_eq!( + jvm_config, + "-Xms819m \ + -Xmx819m \ + -Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=8183:/stackable/jmx/namenode.yaml" + ); + } + + #[test] + fn test_jvm_config_jvm_argument_overrides() { + let input = r#" + apiVersion: hdfs.stackable.tech/v1alpha1 + kind: HdfsCluster + metadata: + name: hdfs + spec: + image: + productVersion: 3.4.0 + clusterConfig: + zookeeperConfigMapName: hdfs-zk + nameNodes: + config: + resources: + memory: + limit: 42Gi + jvmArgumentOverrides: + add: + - -Dhttps.proxyHost=proxy.my.corp + - -Dhttps.proxyPort=8080 + - -Djava.net.preferIPv4Stack=true + roleGroups: + default: + replicas: 1 + jvmArgumentOverrides: + # We need more memory! + removeRegex: + - -Xmx.* + - -Dhttps.proxyPort=.* + add: + - -Xmx40000m + - -Dhttps.proxyPort=1234 + "#; + let jvm_config = construct_test_role_specific_jvm_args(input, true); + + assert_eq!( + jvm_config, + format!( + "-Xms34406m \ + -Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=8183:/stackable/jmx/namenode.yaml \ + -Djava.security.krb5.conf={KERBEROS_CONTAINER_PATH}/krb5.conf \ + -Dhttps.proxyHost=proxy.my.corp \ + -Djava.net.preferIPv4Stack=true \ + -Xmx40000m \ + -Dhttps.proxyPort=1234") + ); + } + + fn construct_test_role_specific_jvm_args(hdfs_cluster: &str, kerberos_enabled: bool) -> String { + let hdfs: HdfsCluster = serde_yaml::from_str(hdfs_cluster).expect("illegal test input"); + + let role = HdfsRole::NameNode; + let merged_config = role.merged_config(&hdfs, "default").unwrap(); + let container_config = ContainerConfig::from(role); + let resources = container_config.resources(&merged_config); + + construct_role_specific_jvm_args( + &hdfs, + &role, + "default", + kerberos_enabled, + resources.as_ref(), + "/stackable/config", + DEFAULT_NAME_NODE_METRICS_PORT, + ) + .unwrap() + } +} diff --git a/rust/operator-binary/src/config.rs b/rust/operator-binary/src/config/mod.rs similarity index 99% rename from rust/operator-binary/src/config.rs rename to rust/operator-binary/src/config/mod.rs index 584d7ec6..26406d36 100644 --- a/rust/operator-binary/src/config.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -13,6 +13,8 @@ use stackable_hdfs_crd::{HdfsCluster, HdfsPodRef}; use stackable_operator::utils::cluster_info::KubernetesClusterInfo; use std::collections::BTreeMap; +pub mod jvm; + #[derive(Clone)] pub struct HdfsSiteConfigBuilder { config: BTreeMap, diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 2a72cf55..929ef118 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -52,7 +52,6 @@ use stackable_operator::{ }, kube::{core::ObjectMeta, ResourceExt}, kvp::Labels, - memory::{BinaryMultiple, MemoryQuantity}, product_logging::{ self, framework::{ @@ -68,6 +67,10 @@ use stackable_operator::{ use strum::{Display, EnumDiscriminants, IntoStaticStr}; use crate::{ + config::{ + self, + jvm::{construct_global_jvm_args, construct_role_specific_jvm_args}, + }, product_logging::{ FORMAT_NAMENODES_LOG4J_CONFIG_FILE, FORMAT_ZOOKEEPER_LOG4J_CONFIG_FILE, HDFS_LOG4J_CONFIG_FILE, MAX_FORMAT_NAMENODE_LOG_FILE_SIZE, @@ -75,7 +78,8 @@ use crate::{ MAX_WAIT_NAMENODES_LOG_FILE_SIZE, MAX_ZKFC_LOG_FILE_SIZE, STACKABLE_LOG_DIR, WAIT_FOR_NAMENODES_LOG4J_CONFIG_FILE, ZKFC_LOG4J_CONFIG_FILE, }, - DATANODE_ROOT_DATA_DIR_PREFIX, JVM_SECURITY_PROPERTIES_FILE, LOG4J_PROPERTIES, + security::kerberos::KERBEROS_CONTAINER_PATH, + DATANODE_ROOT_DATA_DIR_PREFIX, LOG4J_PROPERTIES, }; pub(crate) const TLS_STORE_DIR: &str = "/stackable/tls"; @@ -94,9 +98,9 @@ pub enum Error { #[snafu(display("object has no namespace"))] ObjectHasNoNamespace, - #[snafu(display("invalid java heap config for {role:?}"))] - InvalidJavaHeapConfig { - source: stackable_operator::memory::Error, + #[snafu(display("failed to construct JVM arguments fro role {role:?}"))] + ConstructJvmArguments { + source: config::jvm::Error, role: String, }, @@ -202,7 +206,6 @@ impl ContainerConfig { const WAIT_FOR_NAMENODES_CONFIG_VOLUME_MOUNT_NAME: &'static str = "wait-for-namenodes-config"; const WAIT_FOR_NAMENODES_LOG_VOLUME_MOUNT_NAME: &'static str = "wait-for-namenodes-log-config"; - const JVM_HEAP_FACTOR: f32 = 0.8; const HADOOP_HOME: &'static str = "/stackable/hadoop"; /// Add all main, side and init containers as well as required volumes to the pod builder. @@ -212,6 +215,7 @@ impl ContainerConfig { hdfs: &HdfsCluster, cluster_info: &KubernetesClusterInfo, role: &HdfsRole, + role_group: &str, resolved_product_image: &ResolvedProductImage, merged_config: &AnyNodeConfig, env_overrides: Option<&BTreeMap>, @@ -228,6 +232,7 @@ impl ContainerConfig { hdfs, cluster_info, role, + role_group, resolved_product_image, zk_config_map_name, env_overrides, @@ -313,6 +318,7 @@ impl ContainerConfig { hdfs, cluster_info, role, + role_group, resolved_product_image, zk_config_map_name, env_overrides, @@ -333,6 +339,7 @@ impl ContainerConfig { hdfs, cluster_info, role, + role_group, resolved_product_image, zk_config_map_name, env_overrides, @@ -354,6 +361,7 @@ impl ContainerConfig { hdfs, cluster_info, role, + role_group, resolved_product_image, zk_config_map_name, env_overrides, @@ -376,6 +384,7 @@ impl ContainerConfig { hdfs, cluster_info, role, + role_group, resolved_product_image, zk_config_map_name, env_overrides, @@ -445,6 +454,7 @@ impl ContainerConfig { hdfs: &HdfsCluster, cluster_info: &KubernetesClusterInfo, role: &HdfsRole, + role_group: &str, resolved_product_image: &ResolvedProductImage, zookeeper_config_map_name: &str, env_overrides: Option<&BTreeMap>, @@ -463,10 +473,11 @@ impl ContainerConfig { .args(self.args(hdfs, cluster_info, role, merged_config, &[])?) .add_env_vars(self.env( hdfs, + role_group, zookeeper_config_map_name, env_overrides, resources.as_ref(), - )) + )?) .add_volume_mounts(self.volume_mounts(hdfs, merged_config, labels)?) .context(AddVolumeMountSnafu)? .add_container_ports(self.container_ports(hdfs)); @@ -504,6 +515,7 @@ impl ContainerConfig { hdfs: &HdfsCluster, cluster_info: &KubernetesClusterInfo, role: &HdfsRole, + role_group: &str, resolved_product_image: &ResolvedProductImage, zookeeper_config_map_name: &str, env_overrides: Option<&BTreeMap>, @@ -517,7 +529,13 @@ impl ContainerConfig { cb.image_from_product_image(resolved_product_image) .command(Self::command()) .args(self.args(hdfs, cluster_info, role, merged_config, namenode_podrefs)?) - .add_env_vars(self.env(hdfs, zookeeper_config_map_name, env_overrides, None)) + .add_env_vars(self.env( + hdfs, + role_group, + zookeeper_config_map_name, + env_overrides, + None, + )?) .add_volume_mounts(self.volume_mounts(hdfs, merged_config, labels)?) .context(AddVolumeMountSnafu)?; @@ -784,8 +802,7 @@ wait_for_termination $! // Command to export `KERBEROS_REALM` env var to default real from krb5.conf, e.g. `CLUSTER.LOCAL` fn export_kerberos_real_env_var_command() -> String { - "export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' /stackable/kerberos/krb5.conf)\n" - .to_string() + format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {KERBEROS_CONTAINER_PATH}/krb5.conf)\n") } /// Command to `kinit` a ticket using the principal created for the specified hdfs role @@ -805,8 +822,8 @@ wait_for_termination $! ); Ok(formatdoc!( r###" - echo "Getting ticket for {principal}" from /stackable/kerberos/keytab - kinit "{principal}" -kt /stackable/kerberos/keytab + echo "Getting ticket for {principal}" from {KERBEROS_CONTAINER_PATH}/keytab + kinit "{principal}" -kt {KERBEROS_CONTAINER_PATH}/keytab "###, )) } @@ -823,10 +840,11 @@ wait_for_termination $! fn env( &self, hdfs: &HdfsCluster, + role_group: &str, zookeeper_config_map_name: &str, env_overrides: Option<&BTreeMap>, resources: Option<&ResourceRequirements>, - ) -> Vec { + ) -> Result, Error> { // Maps env var name to env var object. This allows env_overrides to work // as expected (i.e. users can override the env var value). let mut env: BTreeMap = BTreeMap::new(); @@ -855,33 +873,26 @@ wait_for_termination $! role_opts_name.clone(), EnvVar { name: role_opts_name, - value: self.build_hadoop_opts(hdfs, resources).ok(), + value: Some(self.build_hadoop_opts(hdfs, role_group, resources)?), ..EnvVar::default() }, ); } - // Additionally, any other init or sidecar container must have access to the following settings. - // As the Prometheus metric emitter is not part of this config it's safe to use for hdfs cli tools as well. - // This will not only enable the init containers to work, but also the user to run e.g. - // `bin/hdfs dfs -ls /` without getting `Caused by: java.lang.IllegalArgumentException: KrbException: Cannot locate default realm` - // because the `-Djava.security.krb5.conf` setting is missing - if hdfs.has_kerberos_enabled() { - env.insert( - "HADOOP_OPTS".to_string(), - EnvVar { - name: "HADOOP_OPTS".to_string(), - value: Some( - "-Djava.security.krb5.conf=/stackable/kerberos/krb5.conf".to_string(), - ), - ..EnvVar::default() - }, - ); + env.insert( + "HADOOP_OPTS".to_string(), + EnvVar { + name: "HADOOP_OPTS".to_string(), + value: Some(construct_global_jvm_args(hdfs.has_kerberos_enabled())), + ..EnvVar::default() + }, + ); + if hdfs.has_kerberos_enabled() { env.insert( "KRB5_CONFIG".to_string(), EnvVar { name: "KRB5_CONFIG".to_string(), - value: Some("/stackable/kerberos/krb5.conf".to_string()), + value: Some(format!("{KERBEROS_CONTAINER_PATH}/krb5.conf")), ..EnvVar::default() }, ); @@ -889,11 +900,12 @@ wait_for_termination $! "KRB5_CLIENT_KTNAME".to_string(), EnvVar { name: "KRB5_CLIENT_KTNAME".to_string(), - value: Some("/stackable/kerberos/keytab".to_string()), + value: Some(format!("{KERBEROS_CONTAINER_PATH}/keytab")), ..EnvVar::default() }, ); } + // Needed for the `containerdebug` process to log it's tracing information to. env.insert( "CONTAINERDEBUG_LOG_DIRECTORY".to_string(), @@ -913,11 +925,11 @@ wait_for_termination $! env.append(&mut env_override_vars); - env.into_values().collect() + Ok(env.into_values().collect()) } /// Returns the container resources. - fn resources(&self, merged_config: &AnyNodeConfig) -> Option { + pub fn resources(&self, merged_config: &AnyNodeConfig) -> Option { match self { // Namenode sidecar containers ContainerConfig::Zkfc { .. } => Some( @@ -1095,7 +1107,8 @@ wait_for_termination $! // Adding this for all containers, as not only the main container needs Kerberos or TLS if hdfs.has_kerberos_enabled() { - volume_mounts.push(VolumeMountBuilder::new("kerberos", "/stackable/kerberos").build()); + volume_mounts + .push(VolumeMountBuilder::new("kerberos", KERBEROS_CONTAINER_PATH).build()); } if hdfs.has_https_enabled() { // This volume will be propagated by the create-tls-cert-bundle container @@ -1198,6 +1211,7 @@ wait_for_termination $! fn build_hadoop_opts( &self, hdfs: &HdfsCluster, + role_group: &str, resources: Option<&ResourceRequirements>, ) -> Result { match self { @@ -1206,39 +1220,18 @@ wait_for_termination $! } => { let cvd = ContainerVolumeDirs::from(role); let config_dir = cvd.final_config(); - let mut jvm_args = vec![ - format!( - "-Djava.security.properties={config_dir}/{JVM_SECURITY_PROPERTIES_FILE} -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={metrics_port}:/stackable/jmx/{role}.yaml", - )]; - - if hdfs.has_kerberos_enabled() { - jvm_args.push( - "-Djava.security.krb5.conf=/stackable/kerberos/krb5.conf".to_string(), - ); - } - - if let Some(memory_limit) = resources.and_then(|r| r.limits.as_ref()?.get("memory")) - { - let memory_limit = - MemoryQuantity::try_from(memory_limit).with_context(|_| { - InvalidJavaHeapConfigSnafu { - role: role.to_string(), - } - })?; - jvm_args.push(format!( - "-Xmx{}", - (memory_limit * Self::JVM_HEAP_FACTOR) - .scale_to(BinaryMultiple::Kibi) - .format_for_java() - .with_context(|_| { - InvalidJavaHeapConfigSnafu { - role: role.to_string(), - } - })? - )); - } - - Ok(jvm_args.join(" ").trim().to_string()) + construct_role_specific_jvm_args( + hdfs, + role, + role_group, + hdfs.has_kerberos_enabled(), + resources, + config_dir, + *metrics_port, + ) + .with_context(|_| ConstructJvmArgumentsSnafu { + role: role.to_string(), + }) } _ => Ok("".to_string()), } diff --git a/rust/operator-binary/src/hdfs_controller.rs b/rust/operator-binary/src/hdfs_controller.rs index 31054adb..234c63e6 100644 --- a/rust/operator-binary/src/hdfs_controller.rs +++ b/rust/operator-binary/src/hdfs_controller.rs @@ -861,6 +861,7 @@ fn rolegroup_statefulset( hdfs, cluster_info, role, + &rolegroup_ref.role_group, resolved_product_image, merged_config, env_overrides, @@ -1000,6 +1001,7 @@ properties: [] cluster_domain: DomainName::try_from("cluster.local").unwrap(), }, &role, + "default", &resolved_product_image, &merged_config, env_overrides, diff --git a/rust/operator-binary/src/security/kerberos.rs b/rust/operator-binary/src/security/kerberos.rs index 9b44748c..5bb85f5a 100644 --- a/rust/operator-binary/src/security/kerberos.rs +++ b/rust/operator-binary/src/security/kerberos.rs @@ -10,6 +10,8 @@ use stackable_operator::{ use crate::config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder}; +pub const KERBEROS_CONTAINER_PATH: &str = "/stackable/kerberos"; + type Result = std::result::Result; #[derive(Snafu, Debug)] @@ -85,9 +87,18 @@ impl CoreSiteConfigBuilder { "dfs.web.authentication.kerberos.principal", format!("HTTP/{principal_host_part}"), ) - .add("dfs.journalnode.keytab.file", "/stackable/kerberos/keytab") - .add("dfs.namenode.keytab.file", "/stackable/kerberos/keytab") - .add("dfs.datanode.keytab.file", "/stackable/kerberos/keytab") + .add( + "dfs.journalnode.keytab.file", + format!("{KERBEROS_CONTAINER_PATH}/keytab"), + ) + .add( + "dfs.namenode.keytab.file", + format!("{KERBEROS_CONTAINER_PATH}/keytab"), + ) + .add( + "dfs.datanode.keytab.file", + format!("{KERBEROS_CONTAINER_PATH}/keytab"), + ) .add( "dfs.journalnode.kerberos.principal.pattern", format!("jn/{principal_host_part}"),