From 58cb34c3fef5f2239358e3b853b1df517ef9b685 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Mon, 17 Feb 2025 15:57:02 +0100 Subject: [PATCH 01/11] feat: Support configuring JVM arguments --- deploy/helm/hive-operator/crds/crds.yaml | 52 ++++ ...ironment-overrides.adoc => overrides.adoc} | 7 + docs/modules/hive/partials/nav.adoc | 2 +- rust/operator-binary/src/config/jvm.rs | 225 ++++++++++++++++++ rust/operator-binary/src/config/mod.rs | 1 + rust/operator-binary/src/controller.rs | 61 ++--- rust/operator-binary/src/crd/mod.rs | 50 +--- rust/operator-binary/src/main.rs | 1 + 8 files changed, 329 insertions(+), 70 deletions(-) rename docs/modules/hive/pages/usage-guide/{configuration-environment-overrides.adoc => overrides.adoc} (87%) create mode 100644 rust/operator-binary/src/config/jvm.rs create mode 100644 rust/operator-binary/src/config/mod.rs diff --git a/deploy/helm/hive-operator/crds/crds.yaml b/deploy/helm/hive-operator/crds/crds.yaml index ef2f0d66..2cc67500 100644 --- a/deploy/helm/hive-operator/crds/crds.yaml +++ b/deploy/helm/hive-operator/crds/crds.yaml @@ -507,6 +507,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. @@ -775,6 +801,32 @@ spec: default: {} description: '`envOverrides` configure environment variables to be set in the Pods. It is a map from strings to strings - environment variables and the value to set. Read the [environment variable overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#env-overrides) for more information and consult the operator specific usage guide to find out about the product specific environment variables that are available.' type: object + jvmArgumentOverrides: + default: + add: [] + remove: [] + removeRegex: [] + description: Allows overriding JVM arguments. Please read on the [JVM argument overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#jvm-argument-overrides) for details on the usage. + properties: + add: + default: [] + description: JVM arguments to be added + items: + type: string + type: array + remove: + default: [] + description: JVM arguments to be removed by exact match + items: + type: string + type: array + removeRegex: + default: [] + description: JVM arguments matching any of this regexes will be removed + items: + type: string + type: array + type: object podOverrides: default: {} description: In the `podOverrides` property you can define a [PodTemplateSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#podtemplatespec-v1-core) to override any property that can be set on a Kubernetes Pod. Read the [Pod overrides documentation](https://docs.stackable.tech/home/nightly/concepts/overrides#pod-overrides) for more information. diff --git a/docs/modules/hive/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/hive/pages/usage-guide/overrides.adoc similarity index 87% rename from docs/modules/hive/pages/usage-guide/configuration-environment-overrides.adoc rename to docs/modules/hive/pages/usage-guide/overrides.adoc index 39b0a505..fc180708 100644 --- a/docs/modules/hive/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/hive/pages/usage-guide/overrides.adoc @@ -101,3 +101,10 @@ metastore: The Hive operator also supports Pod overrides, allowing you to override any property that you can set on a Kubernetes Pod. Read the xref:concepts:overrides.adoc#pod-overrides[Pod overrides documentation] to learn more about this feature. + +== JVM argument overrides + +Stackable operators automatically determine the set of needed JVM arguments, such as memory settings or trust- and keystores. +Using JVM argument overrides you can configure the JVM arguments xref:concepts:overrides.adoc#jvm-argument-overrides[according to the concepts page]. + +One thing that is different for Hive metastores, is that all heap-related arguments will be passed in via the env variable `HADOOP_HEAPSIZE`, all the other ones via `HADOOP_OPTS`. diff --git a/docs/modules/hive/partials/nav.adoc b/docs/modules/hive/partials/nav.adoc index bb28b0da..94cd6333 100644 --- a/docs/modules/hive/partials/nav.adoc +++ b/docs/modules/hive/partials/nav.adoc @@ -11,7 +11,7 @@ ** xref:hive:usage-guide/monitoring.adoc[] ** xref:hive:usage-guide/resources.adoc[] ** xref:hive:usage-guide/security.adoc[] -** xref:hive:usage-guide/configuration-environment-overrides.adoc[] +** xref:hive:usage-guide/overrides.adoc[] ** xref:hive:usage-guide/operations/index.adoc[] *** xref:hive:usage-guide/operations/cluster-operations.adoc[] *** xref:hive:usage-guide/operations/pod-placement.adoc[] diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs new file mode 100644 index 00000000..0481b8f7 --- /dev/null +++ b/rust/operator-binary/src/config/jvm.rs @@ -0,0 +1,225 @@ +use crate::crd::{ + v1alpha1::HiveCluster, MetaStoreConfig, MetaStoreConfigFragment, JVM_SECURITY_PROPERTIES_FILE, + METRICS_PORT, STACKABLE_CONFIG_DIR, STACKABLE_TRUST_STORE, STACKABLE_TRUST_STORE_PASSWORD, +}; +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + memory::{BinaryMultiple, MemoryQuantity}, + role_utils::{self, GenericRoleConfig, JavaCommonConfig, JvmArgumentOverrides, Role}, +}; + +const JAVA_HEAP_FACTOR: f32 = 0.8; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("invalid memory resource configuration - missing default or value in crd?"))] + MissingMemoryResourceConfig, + + #[snafu(display("invalid memory config"))] + InvalidMemoryConfig { + source: stackable_operator::memory::Error, + }, + + #[snafu(display("failed to merge jvm argument overrides"))] + MergeJvmArgumentOverrides { source: role_utils::Error }, +} + +/// All JVM arguments. +fn construct_jvm_args( + hive: &HiveCluster, + merged_config: &MetaStoreConfig, + role: &Role, + role_group: &str, +) -> Result, Error> { + let heap_size = MemoryQuantity::try_from( + merged_config + .resources + .memory + .limit + .as_ref() + .context(MissingMemoryResourceConfigSnafu)?, + ) + .context(InvalidMemoryConfigSnafu)? + .scale_to(BinaryMultiple::Mebi) + * JAVA_HEAP_FACTOR; + let java_heap = heap_size + .format_for_java() + .context(InvalidMemoryConfigSnafu)?; + + let mut jvm_args = vec![ + // Heap settings + format!("-Xmx{java_heap}"), + format!("-Xms{java_heap}"), + format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), + format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/jmx_hive_config.yaml"), + format!("-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}"), + format!("-Djavax.net.ssl.trustStorePassword={STACKABLE_TRUST_STORE_PASSWORD}"), + format!("-Djavax.net.ssl.trustStoreType=pkcs12"), + ]; + + if hive.has_kerberos_enabled() { + jvm_args.push("-Djava.security.krb5.conf=/stackable/kerberos/krb5.conf".to_owned()); + } + + let operator_generated = JvmArgumentOverrides::new_with_only_additions(jvm_args); + let merged = role + .get_merged_jvm_argument_overrides(role_group, &operator_generated) + .context(MergeJvmArgumentOverridesSnafu)?; + Ok(merged + .effective_jvm_config_after_merging() + // Sorry for the clone, that's how operator-rs is currently modelled :P + .clone()) +} + +/// Arguments that go into `HADOOP_OPTS`, so *not* the heap settings (which you can get using +/// [`construct_heap_jvm_args`]). +pub fn construct_non_heap_jvm_args( + hive: &HiveCluster, + merged_config: &MetaStoreConfig, + role: &Role, + role_group: &str, +) -> Result { + let mut jvm_args = construct_jvm_args(hive, merged_config, role, role_group)?; + jvm_args.retain(|arg| !is_heap_jvm_argument(arg)); + + Ok(jvm_args.join(" ")) +} + +/// Arguments that go into `HADOOP_HEAPSIZE`. +/// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. +pub fn construct_heap_jvm_args( + hive: &HiveCluster, + merged_config: &MetaStoreConfig, + role: &Role, + role_group: &str, +) -> Result { + let mut jvm_args = construct_jvm_args(hive, merged_config, role, role_group)?; + jvm_args.retain(|arg| is_heap_jvm_argument(arg)); + + Ok(jvm_args.join(" ")) +} + +fn is_heap_jvm_argument(jvm_argument: &str) -> bool { + let lowercase = jvm_argument.to_lowercase(); + + lowercase.starts_with("-xms") || lowercase.starts_with("-xmx") +} + +#[cfg(test)] +mod tests { + use crate::crd::HiveRole; + + use super::*; + + #[test] + fn test_construct_jvm_arguments_defaults() { + let input = r#" + apiVersion: hive.stackable.tech/v1alpha1 + kind: HiveCluster + metadata: + name: simple-hive + spec: + image: + productVersion: 4.0.0 + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + dbType: derby + credentialsSecret: mySecret + metastore: + roleGroups: + default: + replicas: 1 + "#; + let (hive, hive_role, role, merged_config) = construct_boilerplate(input); + let non_heap_jvm_args = + construct_non_heap_jvm_args(&hive, &hive_role, &role, &merged_config).unwrap(); + let heap_jvm_args = + construct_heap_jvm_args(&hive, &hive_role, &role, &merged_config).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9084:/stackable/jmx/jmx_hive_config.yaml \ + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 \ + -Djavax.net.ssl.trustStorePassword=changeit \ + -Djavax.net.ssl.trustStoreType=pkcs12" + ); + assert_eq!(heap_jvm_args, "-Xmx409m -Xms409m"); + } + + #[test] + fn test_construct_jvm_argument_overrides() { + let input = r#" + apiVersion: hive.stackable.tech/v1alpha1 + kind: HiveCluster + metadata: + name: simple-hive + spec: + image: + productVersion: 4.0.0 + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + dbType: derby + credentialsSecret: mySecret + metastore: + config: + resources: + memory: + limit: 42Gi + jvmArgumentOverrides: + add: + - -Dhttps.proxyHost=proxy.my.corp + - -Dhttps.proxyPort=8080 + - -Djava.net.preferIPv4Stack=true + roleGroups: + default: + replicas: 1 + jvmArgumentOverrides: + # We need more memory! + removeRegex: + - -Xmx.* + - -Dhttps.proxyPort=.* + add: + - -Xmx40000m + - -Dhttps.proxyPort=1234 + "#; + let (hive, merged_config, role, role_group) = construct_boilerplate(input); + let non_heap_jvm_args = + construct_non_heap_jvm_args(&hive, &merged_config, &role, &role_group).unwrap(); + let heap_jvm_args = + construct_heap_jvm_args(&hive, &merged_config, &role, &role_group).unwrap(); + + assert_eq!( + non_heap_jvm_args, + "-Djava.security.properties=/stackable/config/security.properties \ + -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar=9084:/stackable/jmx/jmx_hive_config.yaml \ + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 \ + -Djavax.net.ssl.trustStorePassword=changeit \ + -Djavax.net.ssl.trustStoreType=pkcs12 \ + -Dhttps.proxyHost=proxy.my.corp \ + -Djava.net.preferIPv4Stack=true \ + -Dhttps.proxyPort=1234" + ); + assert_eq!(heap_jvm_args, "-Xms34406m -Xmx40000m"); + } + + fn construct_boilerplate( + hive_cluster: &str, + ) -> ( + HiveCluster, + MetaStoreConfig, + Role, + String, + ) { + let hive: HiveCluster = serde_yaml::from_str(hive_cluster).expect("illegal test input"); + + let hive_role = HiveRole::MetaStore; + let rolegroup_ref = hive.metastore_rolegroup_ref("default"); + let merged_config = hive.merged_config(&hive_role, &rolegroup_ref).unwrap(); + let role = hive.spec.metastore.clone().unwrap(); + + (hive, merged_config, role, "default".to_owned()) + } +} diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs new file mode 100644 index 00000000..271c6d99 --- /dev/null +++ b/rust/operator-binary/src/config/mod.rs @@ -0,0 +1 @@ +pub mod jvm; diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 89477680..3e8e26f0 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -66,7 +66,7 @@ use stackable_operator::{ CustomContainerLogConfig, }, }, - role_utils::{GenericRoleConfig, RoleGroupRef}, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, status::condition::{ compute_conditions, operations::ClusterOperationsConditionBuilder, statefulset::StatefulSetConditionBuilder, @@ -79,10 +79,11 @@ use tracing::warn; use crate::{ command::build_container_command_args, + config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, crd::{ - v1alpha1, Container, HiveClusterStatus, HiveRole, MetaStoreConfig, APP_NAME, CORE_SITE_XML, - DB_PASSWORD_ENV, DB_USERNAME_ENV, HADOOP_HEAPSIZE, HIVE_ENV_SH, HIVE_PORT, HIVE_PORT_NAME, - HIVE_SITE_XML, JVM_HEAP_FACTOR, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, + v1alpha1, Container, HiveClusterStatus, HiveRole, MetaStoreConfig, MetaStoreConfigFragment, + APP_NAME, CORE_SITE_XML, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_ENV_SH, HIVE_PORT, + HIVE_PORT_NAME, HIVE_SITE_XML, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_CONFIG_DIR_NAME, STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_DIR, @@ -101,7 +102,7 @@ use crate::{ pub const HIVE_CONTROLLER_NAME: &str = "hivecluster"; pub const HIVE_FULL_CONTROLLER_NAME: &str = concatcp!(HIVE_CONTROLLER_NAME, '.', OPERATOR_NAME); -/// Used as runAsUser in the pod security context. This is specified in the kafka image file +/// Used as runAsUser in the pod security context pub const HIVE_UID: i64 = 1000; const DOCKER_IMAGE_BASE_NAME: &str = "hive"; @@ -328,6 +329,9 @@ pub enum Error { InvalidHiveCluster { source: error_boundary::InvalidObject, }, + + #[snafu(display("failed to construct JVM arguments"))] + ConstructJvmArguments { source: crate::config::jvm::Error }, } type Result = std::result::Result; @@ -354,6 +358,7 @@ pub async fn reconcile_hive( .spec .image .resolve(DOCKER_IMAGE_BASE_NAME, crate::built_info::PKG_VERSION); + let role = hive.spec.metastore.as_ref().context(NoMetaStoreRoleSnafu)?; let hive_role = HiveRole::MetaStore; let s3_connection_spec: Option = @@ -385,7 +390,7 @@ pub async fn reconcile_hive( PropertyNameKind::File(HIVE_ENV_SH.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], - hive.spec.metastore.clone().context(NoMetaStoreRoleSnafu)?, + role.clone(), ), )] .into(), @@ -455,6 +460,7 @@ pub async fn reconcile_hive( hive, &hive_namespace, &resolved_product_image, + role, &rolegroup, rolegroup_config, s3_connection_spec.as_ref(), @@ -598,6 +604,7 @@ fn build_metastore_rolegroup_config_map( hive: &v1alpha1::HiveCluster, hive_namespace: &str, resolved_product_image: &ResolvedProductImage, + role: &Role, rolegroup: &RoleGroupRef, role_group_config: &HashMap>, s3_connection_spec: Option<&S3ConnectionSpec>, @@ -611,36 +618,32 @@ fn build_metastore_rolegroup_config_map( for (property_name_kind, config) in role_group_config { match property_name_kind { PropertyNameKind::File(file_name) if file_name == HIVE_ENV_SH => { - let mut data = BTreeMap::new(); - - let memory_limit = MemoryQuantity::try_from( - merged_config - .resources - .memory - .limit - .as_ref() - .context(InvalidJavaHeapConfigSnafu)?, - ) - .context(FailedToConvertJavaHeapSnafu { - unit: BinaryMultiple::Mebi.to_java_memory_unit(), - })?; - let heap_in_mebi = (memory_limit * JVM_HEAP_FACTOR) - .scale_to(BinaryMultiple::Mebi) - .floor() - .value as u32; - - data.insert(HADOOP_HEAPSIZE.to_string(), Some(heap_in_mebi.to_string())); + let mut data = BTreeMap::from([ + ( + "HADOOP_HEAPSIZE".to_string(), + construct_heap_jvm_args(hive, merged_config, role, &rolegroup.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ), + ( + "HADOOP_OPTS".to_string(), + construct_non_heap_jvm_args( + hive, + merged_config, + role, + &rolegroup.role_group, + ) + .context(ConstructJvmArgumentsSnafu)?, + ), + ]); // other properties / overrides for (property_name, property_value) in config { - data.insert(property_name.to_string(), Some(property_value.to_string())); + data.insert(property_name.to_string(), property_value.to_string()); } hive_env_data = data .into_iter() - .map(|(key, value)| { - format!("export {key}={val}", val = value.unwrap_or_default()) - }) + .map(|(key, value)| format!("export {key}={value}")) .collect::>() .join("\n"); } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index d9c60038..42c48f05 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -1,6 +1,5 @@ use std::{collections::BTreeMap, str::FromStr}; -use indoc::formatdoc; use security::AuthenticationConfig; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; @@ -23,9 +22,7 @@ use stackable_operator::{ kube::{runtime::reflector::ObjectRef, CustomResource, ResourceExt}, product_config_utils::{self, Configuration}, product_logging::{self, spec::Logging}, - role_utils::{ - GenericProductSpecificCommonConfig, GenericRoleConfig, Role, RoleGroup, RoleGroupRef, - }, + role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, @@ -70,13 +67,6 @@ pub const SYSTEM_TRUST_STORE_PASSWORD: &str = "changeit"; pub const STACKABLE_TRUST_STORE: &str = "/stackable/truststore.p12"; pub const STACKABLE_TRUST_STORE_PASSWORD: &str = "changeit"; -// Metastore opts -pub const HADOOP_OPTS: &str = "HADOOP_OPTS"; - -// Heap -pub const HADOOP_HEAPSIZE: &str = "HADOOP_HEAPSIZE"; -pub const JVM_HEAP_FACTOR: f32 = 0.8; - // DB credentials pub const DB_USERNAME_PLACEHOLDER: &str = "xxx_db_username_xxx"; pub const DB_PASSWORD_PLACEHOLDER: &str = "xxx_db_password_xxx"; @@ -141,7 +131,7 @@ pub mod versioned { // no doc - docs in Role struct. #[serde(default, skip_serializing_if = "Option::is_none")] - pub metastore: Option>, + pub metastore: Option>, } #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] @@ -234,7 +224,10 @@ impl v1alpha1::HiveCluster { })) } - pub fn role(&self, role_variant: &HiveRole) -> Result<&Role, Error> { + pub fn role( + &self, + role_variant: &HiveRole, + ) -> Result<&Role, Error> { match role_variant { HiveRole::MetaStore => self.spec.metastore.as_ref(), } @@ -246,7 +239,7 @@ impl v1alpha1::HiveCluster { pub fn rolegroup( &self, rolegroup_ref: &RoleGroupRef, - ) -> Result, Error> { + ) -> Result, Error> { let role_variant = HiveRole::from_str(&rolegroup_ref.role).with_context(|_| UnknownHiveRoleSnafu { role: rolegroup_ref.role.to_owned(), @@ -574,24 +567,11 @@ impl Configuration for MetaStoreConfigFragment { fn compute_env( &self, - hive: &Self::Configurable, + _hive: &Self::Configurable, _role_name: &str, ) -> Result>, product_config_utils::Error> { - let mut result = BTreeMap::new(); - - let env = formatdoc! {" - -javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/jmx_hive_config.yaml \ - -Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE} \ - -Djavax.net.ssl.trustStorePassword={STACKABLE_TRUST_STORE_PASSWORD} \ - -Djavax.net.ssl.trustStoreType=pkcs12 \ - -Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE} \ - {java_security_krb5_conf}", - java_security_krb5_conf = java_security_krb5_conf(hive) - }; - - result.insert(HADOOP_OPTS.to_string(), Some(env)); - - Ok(result) + // Well product-config strikes again... + Ok(BTreeMap::new()) } fn compute_cli( @@ -649,16 +629,6 @@ impl Configuration for MetaStoreConfigFragment { } } -fn java_security_krb5_conf(hive: &v1alpha1::HiveCluster) -> String { - if hive.has_kerberos_enabled() { - return formatdoc! { - "-Djava.security.krb5.conf=/stackable/kerberos/krb5.conf" - }; - } - - String::new() -} - #[derive(Clone, Default, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct HiveClusterStatus { diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 92ee2579..ba5a6a5f 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -1,4 +1,5 @@ mod command; +mod config; mod controller; mod crd; mod discovery; From 2c15817a56c4a247d1c4aeea9d75e537bc4860d7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Mon, 17 Feb 2025 16:01:24 +0100 Subject: [PATCH 02/11] changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc166c43..5b89b6cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file. - Run a `containerdebug` process in the background of each Hive container to collect debugging information ([#554]). - Aggregate emitted Kubernetes events on the CustomResources ([#560]). +- Support configuring JVM arguments ([#572]). ### Changed @@ -16,6 +17,7 @@ All notable changes to this project will be documented in this file. [#554]: https://github.com/stackabletech/hive-operator/pull/554 [#560]: https://github.com/stackabletech/hive-operator/pull/560 [#561]: https://github.com/stackabletech/hive-operator/pull/561 +[#572]: https://github.com/stackabletech/hive-operator/pull/572 ## [24.11.1] - 2025-01-10 From 7d756f763a6e6c5e7e400086c8d00a526fefee88 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 08:49:28 +0100 Subject: [PATCH 03/11] fmt --- rust/operator-binary/src/config/jvm.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 0481b8f7..f1acf510 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -1,13 +1,14 @@ -use crate::crd::{ - v1alpha1::HiveCluster, MetaStoreConfig, MetaStoreConfigFragment, JVM_SECURITY_PROPERTIES_FILE, - METRICS_PORT, STACKABLE_CONFIG_DIR, STACKABLE_TRUST_STORE, STACKABLE_TRUST_STORE_PASSWORD, -}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ memory::{BinaryMultiple, MemoryQuantity}, role_utils::{self, GenericRoleConfig, JavaCommonConfig, JvmArgumentOverrides, Role}, }; +use crate::crd::{ + v1alpha1::HiveCluster, MetaStoreConfig, MetaStoreConfigFragment, JVM_SECURITY_PROPERTIES_FILE, + METRICS_PORT, STACKABLE_CONFIG_DIR, STACKABLE_TRUST_STORE, STACKABLE_TRUST_STORE_PASSWORD, +}; + const JAVA_HEAP_FACTOR: f32 = 0.8; #[derive(Snafu, Debug)] @@ -107,9 +108,8 @@ fn is_heap_jvm_argument(jvm_argument: &str) -> bool { #[cfg(test)] mod tests { - use crate::crd::HiveRole; - use super::*; + use crate::crd::HiveRole; #[test] fn test_construct_jvm_arguments_defaults() { From fddc46323dd91ba33895303179af9bece9b7aa3d Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 13:17:15 +0100 Subject: [PATCH 04/11] fix: HADOOP_HEAPSIZE has no suffix --- .../hive/pages/usage-guide/overrides.adoc | 1 + rust/operator-binary/src/config/jvm.rs | 44 ++++++++++--------- rust/operator-binary/src/controller.rs | 4 +- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/docs/modules/hive/pages/usage-guide/overrides.adoc b/docs/modules/hive/pages/usage-guide/overrides.adoc index fc180708..ed6aa8bc 100644 --- a/docs/modules/hive/pages/usage-guide/overrides.adoc +++ b/docs/modules/hive/pages/usage-guide/overrides.adoc @@ -108,3 +108,4 @@ Stackable operators automatically determine the set of needed JVM arguments, suc Using JVM argument overrides you can configure the JVM arguments xref:concepts:overrides.adoc#jvm-argument-overrides[according to the concepts page]. One thing that is different for Hive metastores, is that all heap-related arguments will be passed in via the env variable `HADOOP_HEAPSIZE`, all the other ones via `HADOOP_OPTS`. +`HADOOP_HEAPSIZE` can *not* have a unit suffix, it will always be an integer representing the number of megabytes heap available. diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index f1acf510..0244a177 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -86,18 +86,22 @@ pub fn construct_non_heap_jvm_args( Ok(jvm_args.join(" ")) } -/// Arguments that go into `HADOOP_HEAPSIZE`. -/// You can get the normal JVM arguments using [`construct_non_heap_jvm_args`]. -pub fn construct_heap_jvm_args( - hive: &HiveCluster, - merged_config: &MetaStoreConfig, - role: &Role, - role_group: &str, -) -> Result { - let mut jvm_args = construct_jvm_args(hive, merged_config, role, role_group)?; - jvm_args.retain(|arg| is_heap_jvm_argument(arg)); +/// This will be put into `HADOOP_HEAPSIZE`, which is just the heap size in megabytes (*without* the `m` +/// unit prepended). +pub fn construct_hadoop_heapsize_env(merged_config: &MetaStoreConfig) -> Result { + let heap_size_in_mb = (MemoryQuantity::try_from( + merged_config + .resources + .memory + .limit + .as_ref() + .context(MissingMemoryResourceConfigSnafu)?, + ) + .context(InvalidMemoryConfigSnafu)? + * JAVA_HEAP_FACTOR) + .scale_to(BinaryMultiple::Mebi); - Ok(jvm_args.join(" ")) + Ok((heap_size_in_mb.value.floor() as u32).to_string()) } fn is_heap_jvm_argument(jvm_argument: &str) -> bool { @@ -131,11 +135,10 @@ mod tests { default: replicas: 1 "#; - let (hive, hive_role, role, merged_config) = construct_boilerplate(input); + let (hive, merged_config, role, rolegroup) = construct_boilerplate(input); let non_heap_jvm_args = - construct_non_heap_jvm_args(&hive, &hive_role, &role, &merged_config).unwrap(); - let heap_jvm_args = - construct_heap_jvm_args(&hive, &hive_role, &role, &merged_config).unwrap(); + construct_non_heap_jvm_args(&hive, &merged_config, &role, &rolegroup).unwrap(); + let hadoop_heapsize_env = construct_hadoop_heapsize_env(&merged_config).unwrap(); assert_eq!( non_heap_jvm_args, @@ -145,7 +148,7 @@ mod tests { -Djavax.net.ssl.trustStorePassword=changeit \ -Djavax.net.ssl.trustStoreType=pkcs12" ); - assert_eq!(heap_jvm_args, "-Xmx409m -Xms409m"); + assert_eq!(hadoop_heapsize_env, "409"); } #[test] @@ -185,11 +188,10 @@ mod tests { - -Xmx40000m - -Dhttps.proxyPort=1234 "#; - let (hive, merged_config, role, role_group) = construct_boilerplate(input); + let (hive, merged_config, role, rolegroup) = construct_boilerplate(input); let non_heap_jvm_args = - construct_non_heap_jvm_args(&hive, &merged_config, &role, &role_group).unwrap(); - let heap_jvm_args = - construct_heap_jvm_args(&hive, &merged_config, &role, &role_group).unwrap(); + construct_non_heap_jvm_args(&hive, &merged_config, &role, &rolegroup).unwrap(); + let hadoop_heapsize_env = construct_hadoop_heapsize_env(&merged_config).unwrap(); assert_eq!( non_heap_jvm_args, @@ -202,7 +204,7 @@ mod tests { -Djava.net.preferIPv4Stack=true \ -Dhttps.proxyPort=1234" ); - assert_eq!(heap_jvm_args, "-Xms34406m -Xmx40000m"); + assert_eq!(hadoop_heapsize_env, "34406"); } fn construct_boilerplate( diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 3e8e26f0..88aa0a9b 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -79,7 +79,7 @@ use tracing::warn; use crate::{ command::build_container_command_args, - config::jvm::{construct_heap_jvm_args, construct_non_heap_jvm_args}, + config::jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, crd::{ v1alpha1, Container, HiveClusterStatus, HiveRole, MetaStoreConfig, MetaStoreConfigFragment, APP_NAME, CORE_SITE_XML, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_ENV_SH, HIVE_PORT, @@ -621,7 +621,7 @@ fn build_metastore_rolegroup_config_map( let mut data = BTreeMap::from([ ( "HADOOP_HEAPSIZE".to_string(), - construct_heap_jvm_args(hive, merged_config, role, &rolegroup.role_group) + construct_hadoop_heapsize_env(merged_config) .context(ConstructJvmArgumentsSnafu)?, ), ( From 91485afbd17f44b2ca324b8f026af83a202d17f2 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 13:18:05 +0100 Subject: [PATCH 05/11] fixup, part 2 --- rust/operator-binary/src/config/jvm.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 0244a177..1047f1dd 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -48,9 +48,6 @@ fn construct_jvm_args( .context(InvalidMemoryConfigSnafu)?; let mut jvm_args = vec![ - // Heap settings - format!("-Xmx{java_heap}"), - format!("-Xms{java_heap}"), format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/jmx_hive_config.yaml"), format!("-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}"), From 2cdbba47ec775ab85490d301eb04cec48433e944 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 13:19:18 +0100 Subject: [PATCH 06/11] clippy --- rust/operator-binary/src/config/jvm.rs | 25 +++---------------------- rust/operator-binary/src/controller.rs | 9 ++------- 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 1047f1dd..4f46933f 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -28,25 +28,9 @@ pub enum Error { /// All JVM arguments. fn construct_jvm_args( hive: &HiveCluster, - merged_config: &MetaStoreConfig, role: &Role, role_group: &str, ) -> Result, Error> { - let heap_size = MemoryQuantity::try_from( - merged_config - .resources - .memory - .limit - .as_ref() - .context(MissingMemoryResourceConfigSnafu)?, - ) - .context(InvalidMemoryConfigSnafu)? - .scale_to(BinaryMultiple::Mebi) - * JAVA_HEAP_FACTOR; - let java_heap = heap_size - .format_for_java() - .context(InvalidMemoryConfigSnafu)?; - let mut jvm_args = vec![ format!("-Djava.security.properties={STACKABLE_CONFIG_DIR}/{JVM_SECURITY_PROPERTIES_FILE}"), format!("-javaagent:/stackable/jmx/jmx_prometheus_javaagent.jar={METRICS_PORT}:/stackable/jmx/jmx_hive_config.yaml"), @@ -73,11 +57,10 @@ fn construct_jvm_args( /// [`construct_heap_jvm_args`]). pub fn construct_non_heap_jvm_args( hive: &HiveCluster, - merged_config: &MetaStoreConfig, role: &Role, role_group: &str, ) -> Result { - let mut jvm_args = construct_jvm_args(hive, merged_config, role, role_group)?; + let mut jvm_args = construct_jvm_args(hive, role, role_group)?; jvm_args.retain(|arg| !is_heap_jvm_argument(arg)); Ok(jvm_args.join(" ")) @@ -133,8 +116,7 @@ mod tests { replicas: 1 "#; let (hive, merged_config, role, rolegroup) = construct_boilerplate(input); - let non_heap_jvm_args = - construct_non_heap_jvm_args(&hive, &merged_config, &role, &rolegroup).unwrap(); + let non_heap_jvm_args = construct_non_heap_jvm_args(&hive, &role, &rolegroup).unwrap(); let hadoop_heapsize_env = construct_hadoop_heapsize_env(&merged_config).unwrap(); assert_eq!( @@ -186,8 +168,7 @@ mod tests { - -Dhttps.proxyPort=1234 "#; let (hive, merged_config, role, rolegroup) = construct_boilerplate(input); - let non_heap_jvm_args = - construct_non_heap_jvm_args(&hive, &merged_config, &role, &rolegroup).unwrap(); + let non_heap_jvm_args = construct_non_heap_jvm_args(&hive, &role, &rolegroup).unwrap(); let hadoop_heapsize_env = construct_hadoop_heapsize_env(&merged_config).unwrap(); assert_eq!( diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 88aa0a9b..4ed96717 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -626,13 +626,8 @@ fn build_metastore_rolegroup_config_map( ), ( "HADOOP_OPTS".to_string(), - construct_non_heap_jvm_args( - hive, - merged_config, - role, - &rolegroup.role_group, - ) - .context(ConstructJvmArgumentsSnafu)?, + construct_non_heap_jvm_args(hive, role, &rolegroup.role_group) + .context(ConstructJvmArgumentsSnafu)?, ), ]); From bd6940052ed3d1e3ed946803ad888994558e9c75 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 13:28:09 +0100 Subject: [PATCH 07/11] Fix rustdoc --- rust/operator-binary/src/config/jvm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/config/jvm.rs b/rust/operator-binary/src/config/jvm.rs index 4f46933f..d6595206 100644 --- a/rust/operator-binary/src/config/jvm.rs +++ b/rust/operator-binary/src/config/jvm.rs @@ -54,7 +54,7 @@ fn construct_jvm_args( } /// Arguments that go into `HADOOP_OPTS`, so *not* the heap settings (which you can get using -/// [`construct_heap_jvm_args`]). +/// [`construct_hadoop_heapsize_env`]). pub fn construct_non_heap_jvm_args( hive: &HiveCluster, role: &Role, From 326438fb6a9d72de9191a04f1505d27c4313d676 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 18 Feb 2025 13:40:23 +0100 Subject: [PATCH 08/11] sneaky sneaky little bug --- rust/operator-binary/src/controller.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 4ed96717..3607efd6 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -638,7 +638,7 @@ fn build_metastore_rolegroup_config_map( hive_env_data = data .into_iter() - .map(|(key, value)| format!("export {key}={value}")) + .map(|(key, value)| format!("export {key}=\"{value}\"")) .collect::>() .join("\n"); } From b3a4e161e741c31669fb0e0683e9cbf6d730f2aa Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 19 Feb 2025 11:01:09 +0100 Subject: [PATCH 09/11] fix: Remove the hive-env.sh --- CHANGELOG.md | 9 ++ rust/operator-binary/src/controller.rs | 97 ++++++------------- rust/operator-binary/src/crd/mod.rs | 57 +++++------ .../kuttl/smoke/60-install-hive.yaml.j2 | 15 ++- tests/templates/kuttl/smoke/61-assert.yaml | 8 +- 5 files changed, 74 insertions(+), 112 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b89b6cc..0b3628cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,15 @@ All notable changes to this project will be documented in this file. - Default to OCI for image metadata and product image selection ([#561]). +### Fixed + +- BREAKING: Remove the `hive-env.sh` config file, as e.g. setting `HADOOP_OPTS` in there had absolutely no effect. + This is considered a fix, as users expected the envs to be used, but they haven't. + Users should use `envOverrides` instead, which are actually working ([#572]). +- BREAKING: The env variable `HADOOP_HEAPSIZE` was previously put in `hive-env.sh` and very likely had no effect. + It is now passed as env variable, thus working. + This might impact your stacklet as the heap size setting now actually has an effect ([#572]). + [#554]: https://github.com/stackabletech/hive-operator/pull/554 [#560]: https://github.com/stackabletech/hive-operator/pull/560 [#561]: https://github.com/stackabletech/hive-operator/pull/561 diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 3607efd6..38706116 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -37,9 +37,8 @@ use stackable_operator::{ api::{ apps::v1::{StatefulSet, StatefulSetSpec}, core::v1::{ - ConfigMap, ConfigMapVolumeSource, EmptyDirVolumeSource, EnvVar, EnvVarSource, - Probe, SecretKeySelector, Service, ServicePort, ServiceSpec, TCPSocketAction, - Volume, + ConfigMap, ConfigMapVolumeSource, EmptyDirVolumeSource, Probe, Service, + ServicePort, ServiceSpec, TCPSocketAction, Volume, }, }, apimachinery::pkg::{ @@ -66,7 +65,7 @@ use stackable_operator::{ CustomContainerLogConfig, }, }, - role_utils::{GenericRoleConfig, JavaCommonConfig, Role, RoleGroupRef}, + role_utils::{GenericRoleConfig, RoleGroupRef}, status::condition::{ compute_conditions, operations::ClusterOperationsConditionBuilder, statefulset::StatefulSetConditionBuilder, @@ -81,11 +80,10 @@ use crate::{ command::build_container_command_args, config::jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, crd::{ - v1alpha1, Container, HiveClusterStatus, HiveRole, MetaStoreConfig, MetaStoreConfigFragment, - APP_NAME, CORE_SITE_XML, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_ENV_SH, HIVE_PORT, - HIVE_PORT_NAME, HIVE_SITE_XML, JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, - METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_CONFIG_DIR_NAME, - STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_CONFIG_MOUNT_DIR_NAME, + v1alpha1, Container, HiveClusterStatus, HiveRole, MetaStoreConfig, APP_NAME, CORE_SITE_XML, + DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_PORT, HIVE_PORT_NAME, HIVE_SITE_XML, + JVM_SECURITY_PROPERTIES_FILE, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR, + STACKABLE_CONFIG_DIR_NAME, STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR_NAME, STACKABLE_LOG_DIR, STACKABLE_LOG_DIR_NAME, }, @@ -387,7 +385,6 @@ pub async fn reconcile_hive( PropertyNameKind::Env, PropertyNameKind::Cli, PropertyNameKind::File(HIVE_SITE_XML.to_string()), - PropertyNameKind::File(HIVE_ENV_SH.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], role.clone(), @@ -460,7 +457,6 @@ pub async fn reconcile_hive( hive, &hive_namespace, &resolved_product_image, - role, &rolegroup, rolegroup_config, s3_connection_spec.as_ref(), @@ -604,7 +600,6 @@ fn build_metastore_rolegroup_config_map( hive: &v1alpha1::HiveCluster, hive_namespace: &str, resolved_product_image: &ResolvedProductImage, - role: &Role, rolegroup: &RoleGroupRef, role_group_config: &HashMap>, s3_connection_spec: Option<&S3ConnectionSpec>, @@ -613,35 +608,9 @@ fn build_metastore_rolegroup_config_map( cluster_info: &KubernetesClusterInfo, ) -> Result { let mut hive_site_data = String::new(); - let mut hive_env_data = String::new(); for (property_name_kind, config) in role_group_config { match property_name_kind { - PropertyNameKind::File(file_name) if file_name == HIVE_ENV_SH => { - let mut data = BTreeMap::from([ - ( - "HADOOP_HEAPSIZE".to_string(), - construct_hadoop_heapsize_env(merged_config) - .context(ConstructJvmArgumentsSnafu)?, - ), - ( - "HADOOP_OPTS".to_string(), - construct_non_heap_jvm_args(hive, role, &rolegroup.role_group) - .context(ConstructJvmArgumentsSnafu)?, - ), - ]); - - // other properties / overrides - for (property_name, property_value) in config { - data.insert(property_name.to_string(), property_value.to_string()); - } - - hive_env_data = data - .into_iter() - .map(|(key, value)| format!("export {key}=\"{value}\"")) - .collect::>() - .join("\n"); - } PropertyNameKind::File(file_name) if file_name == HIVE_SITE_XML => { let mut data = BTreeMap::new(); @@ -724,7 +693,6 @@ fn build_metastore_rolegroup_config_map( .build(), ) .add_data(HIVE_SITE_XML, hive_site_data) - .add_data(HIVE_ENV_SH, hive_env_data) .add_data( JVM_SECURITY_PROPERTIES_FILE, to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { @@ -827,6 +795,27 @@ fn build_metastore_rolegroup_statefulset( name: APP_NAME.to_string(), })?; + let credentials_secret_name = hive.spec.cluster_config.database.credentials_secret.clone(); + + container_builder + // load database credentials to environment variables: these will be used to replace + // the placeholders in hive-site.xml so that the operator does not "touch" the secret. + .add_env_var_from_secret(DB_USERNAME_ENV, &credentials_secret_name, "username") + .add_env_var_from_secret(DB_PASSWORD_ENV, &credentials_secret_name, "password") + .add_env_var( + "HADOOP_HEAPSIZE", + construct_hadoop_heapsize_env(merged_config).context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "HADOOP_OPTS", + construct_non_heap_jvm_args(hive, role, &rolegroup_ref.role_group) + .context(ConstructJvmArgumentsSnafu)?, + ) + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ); + for (property_name_kind, config) in metastore_config { if property_name_kind == &PropertyNameKind::Env { // overrides @@ -844,21 +833,6 @@ fn build_metastore_rolegroup_statefulset( } } - // load database credentials to environment variables: these will be used to replace - // the placeholders in hive-site.xml so that the operator does not "touch" the secret. - let credentials_secret_name = hive.spec.cluster_config.database.credentials_secret.clone(); - - container_builder.add_env_vars(vec![ - env_var_from_secret(DB_USERNAME_ENV, &credentials_secret_name, "username"), - env_var_from_secret(DB_PASSWORD_ENV, &credentials_secret_name, "password"), - // Needed for the `containerdebug` process to log it's tracing information to. - EnvVar { - name: "CONTAINERDEBUG_LOG_DIRECTORY".to_string(), - value: Some(format!("{STACKABLE_LOG_DIR}/containerdebug")), - value_from: None, - }, - ]); - let mut pod_builder = PodBuilder::new(); if let Some(hdfs) = &hive.spec.cluster_config.hdfs { @@ -1127,21 +1101,6 @@ fn build_metastore_rolegroup_statefulset( }) } -fn env_var_from_secret(var_name: &str, secret: &str, secret_key: &str) -> EnvVar { - EnvVar { - name: String::from(var_name), - value_from: Some(EnvVarSource { - secret_key_ref: Some(SecretKeySelector { - name: String::from(secret), - key: String::from(secret_key), - ..Default::default() - }), - ..Default::default() - }), - ..Default::default() - } -} - pub fn error_policy( _obj: Arc>, error: &Error, diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 42c48f05..ac5d6684 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -51,7 +51,6 @@ pub const STACKABLE_LOG_CONFIG_MOUNT_DIR_NAME: &str = "log-config-mount"; // Config file names pub const CORE_SITE_XML: &str = "core-site.xml"; pub const HIVE_SITE_XML: &str = "hive-site.xml"; -pub const HIVE_ENV_SH: &str = "hive-env.sh"; pub const HIVE_METASTORE_LOG4J2_PROPERTIES: &str = "metastore-log4j2.properties"; pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties"; @@ -590,39 +589,35 @@ impl Configuration for MetaStoreConfigFragment { ) -> Result>, product_config_utils::Error> { let mut result = BTreeMap::new(); - match file { - HIVE_SITE_XML => { - if let Some(warehouse_dir) = &self.warehouse_dir { - result.insert( - MetaStoreConfig::METASTORE_WAREHOUSE_DIR.to_string(), - Some(warehouse_dir.to_string()), - ); - } + if file == HIVE_SITE_XML { + if let Some(warehouse_dir) = &self.warehouse_dir { result.insert( - MetaStoreConfig::CONNECTION_URL.to_string(), - Some(hive.spec.cluster_config.database.conn_string.clone()), - ); - // use a placeholder that will be replaced in the start command (also for the password) - result.insert( - MetaStoreConfig::CONNECTION_USER_NAME.to_string(), - Some(DB_USERNAME_PLACEHOLDER.into()), - ); - result.insert( - MetaStoreConfig::CONNECTION_PASSWORD.to_string(), - Some(DB_PASSWORD_PLACEHOLDER.into()), - ); - result.insert( - MetaStoreConfig::CONNECTION_DRIVER_NAME.to_string(), - Some(hive.db_type().get_jdbc_driver_class().to_string()), - ); - - result.insert( - MetaStoreConfig::METASTORE_METRICS_ENABLED.to_string(), - Some("true".to_string()), + MetaStoreConfig::METASTORE_WAREHOUSE_DIR.to_string(), + Some(warehouse_dir.to_string()), ); } - HIVE_ENV_SH => {} - _ => {} + result.insert( + MetaStoreConfig::CONNECTION_URL.to_string(), + Some(hive.spec.cluster_config.database.conn_string.clone()), + ); + // use a placeholder that will be replaced in the start command (also for the password) + result.insert( + MetaStoreConfig::CONNECTION_USER_NAME.to_string(), + Some(DB_USERNAME_PLACEHOLDER.into()), + ); + result.insert( + MetaStoreConfig::CONNECTION_PASSWORD.to_string(), + Some(DB_PASSWORD_PLACEHOLDER.into()), + ); + result.insert( + MetaStoreConfig::CONNECTION_DRIVER_NAME.to_string(), + Some(hive.db_type().get_jdbc_driver_class().to_string()), + ); + + result.insert( + MetaStoreConfig::METASTORE_METRICS_ENABLED.to_string(), + Some("true".to_string()), + ); } Ok(result) diff --git a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 index da89aff8..4db2575e 100644 --- a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 +++ b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 @@ -30,10 +30,10 @@ spec: COMMON_VAR: role-value # overridden by role group below ROLE_VAR: role-value # only defined here at role level configOverrides: - hive-env.sh: - HADOOP_HEAPSIZE: "512" # also set by the operator - COMMON_VAR: role-value # overridden by role group below - ROLE_VAR: role-value # only defined here at role level + hive-site.xml: + hive.metastore.warehouse.dir: "/stackable/warehouse/override" # Also set by the operator + common-var: role-value # Overridden by role group below + role-var: role-value # Only defined here at role level roleGroups: default: replicas: 1 @@ -41,10 +41,9 @@ spec: COMMON_VAR: group-value # overrides role value GROUP_VAR: group-value # only defined here at group level configOverrides: - hive-env.sh: - COMMON_VAR: group-value # overridden by role group below - GROUP_VAR: group-value # only defined here at group level - + hive-site.xml: + common-var: group-value + group-var: group-value --- apiVersion: s3.stackable.tech/v1alpha1 kind: S3Connection diff --git a/tests/templates/kuttl/smoke/61-assert.yaml b/tests/templates/kuttl/smoke/61-assert.yaml index 9dc3f626..22c0cb95 100644 --- a/tests/templates/kuttl/smoke/61-assert.yaml +++ b/tests/templates/kuttl/smoke/61-assert.yaml @@ -14,7 +14,7 @@ commands: # Test configOverrides # - script: | - kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-env.sh"' | grep "export ROLE_VAR=role-value" - kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-env.sh"' | grep "export GROUP_VAR=group-value" - kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-env.sh"' | grep "export COMMON_VAR=group-value" - kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-env.sh"' | grep "export HADOOP_HEAPSIZE=512" + kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-site.xml"' | yq -p=xml '.configuration.property[] | select(.name == "hive.metastore.warehouse.dir") | .value' | grep -qx "/stackable/warehouse/override" + kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-site.xml"' | yq -p=xml '.configuration.property[] | select(.name == "role-var") | .value' | grep -qx "role-value" + kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-site.xml"' | yq -p=xml '.configuration.property[] | select(.name == "group-var") | .value' | grep -qx "group-value" + kubectl -n $NAMESPACE get cm hive-metastore-default -o yaml | yq -e '.data."hive-site.xml"' | yq -p=xml '.configuration.property[] | select(.name == "common-var") | .value' | grep -qx "group-value" From 80474d7e9ae773f27548575fdbfc6ecceda6ddc7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 19 Feb 2025 13:03:12 +0100 Subject: [PATCH 10/11] Fix test --- tests/templates/kuttl/resources/20-assert.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/templates/kuttl/resources/20-assert.yaml b/tests/templates/kuttl/resources/20-assert.yaml index c7cdd416..34cfcdad 100644 --- a/tests/templates/kuttl/resources/20-assert.yaml +++ b/tests/templates/kuttl/resources/20-assert.yaml @@ -3,5 +3,5 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert timeout: 600 commands: - - script: kubectl get cm -n $NAMESPACE hive-metastore-resources-from-role -o yaml | grep -- 'HADOOP_HEAPSIZE=3276' - - script: kubectl get cm -n $NAMESPACE hive-metastore-resources-from-role-group -o yaml | grep -- 'HADOOP_HEAPSIZE=2457' + - script: kubectl -n $NAMESPACE get sts hive-metastore-resources-from-role -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "hive") | .env[] | select (.name == "HADOOP_HEAPSIZE" and .value == "3276")' + - script: kubectl -n $NAMESPACE get sts hive-metastore-resources-from-role-group -o yaml | yq -e '.spec.template.spec.containers[] | select (.name == "hive") | .env[] | select (.name == "HADOOP_HEAPSIZE" and .value == "2457")' From a170a90a1df0c23ed01372ac7d4531f0b1f5d0dc Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 20 Feb 2025 08:21:41 +0100 Subject: [PATCH 11/11] trigger CI