Skip to content

Commit 2a25ff4

Browse files
committed
Add guardrail against trying to crossgrade in the middle of another upgrade
1 parent 46eedee commit 2a25ff4

File tree

3 files changed

+44
-10
lines changed

3 files changed

+44
-10
lines changed

rust/crd/src/lib.rs

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -804,21 +804,40 @@ impl HdfsCluster {
804804
Ok(result)
805805
}
806806

807-
pub fn upgrade_state(&self) -> Option<UpgradeState> {
808-
let status = self.status.as_ref()?;
807+
pub fn upgrade_state(&self) -> Result<Option<UpgradeState>, UpgradeStateError> {
808+
use upgrade_state_error::*;
809+
let Some(status) = self.status.as_ref() else {
810+
return Ok(None);
811+
};
809812
let requested_version = self.spec.image.product_version();
813+
let Some(deployed_version) = status.deployed_product_version.as_deref() else {
814+
// If no deployed version, fresh install -> no upgrade
815+
return Ok(None);
816+
};
817+
let current_upgrade_target_version = status.upgrade_target_product_version.as_deref();
810818

811-
if requested_version != status.deployed_product_version.as_deref()? {
819+
if requested_version != deployed_version {
812820
// If we're requesting a different version than what is deployed, assume that we're upgrading.
813821
// Could also be a downgrade to an older version, but we don't support downgrades after upgrade finalization.
814-
Some(UpgradeState::Upgrading)
815-
} else if requested_version != status.upgrade_target_product_version.as_deref()? {
822+
match current_upgrade_target_version {
823+
Some(upgrading_version) if requested_version != upgrading_version => {
824+
// If we're in an upgrade, do not allow switching to a third version
825+
InvalidCrossgradeSnafu {
826+
requested_version,
827+
deployed_version,
828+
upgrading_version,
829+
}
830+
.fail()
831+
}
832+
_ => Ok(Some(UpgradeState::Upgrading)),
833+
}
834+
} else if current_upgrade_target_version.is_some_and(|x| requested_version != x) {
816835
// If we're requesting the old version mid-upgrade, assume that we're downgrading.
817836
// We only support downgrading to the exact previous version.
818-
Some(UpgradeState::Downgrading)
837+
Ok(Some(UpgradeState::Downgrading))
819838
} else {
820839
// All three versions match, upgrade was completed without clearing `upgrading_product_version`.
821-
None
840+
Ok(None)
822841
}
823842
}
824843

@@ -984,6 +1003,17 @@ pub enum UpgradeState {
9841003
Downgrading,
9851004
}
9861005

1006+
#[derive(Debug, Snafu)]
1007+
#[snafu(module)]
1008+
pub enum UpgradeStateError {
1009+
#[snafu(display("requested version {requested_version:?} while still upgrading from {deployed_version:?} to {upgrading_version:?}, please finish the upgrade or downgrade first"))]
1010+
InvalidCrossgrade {
1011+
requested_version: String,
1012+
deployed_version: String,
1013+
upgrading_version: String,
1014+
},
1015+
}
1016+
9871017
#[derive(
9881018
Clone,
9891019
Debug,

rust/operator-binary/src/container.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ impl ContainerConfig {
549549
args.push_str(&Self::export_kerberos_real_env_var_command());
550550
}
551551

552-
let upgrade_args = if hdfs.upgrade_state() == Some(UpgradeState::Upgrading)
552+
let upgrade_args = if hdfs.upgrade_state().ok() == Some(Some(UpgradeState::Upgrading))
553553
&& *role == HdfsRole::NameNode
554554
{
555555
"-rollingUpgrade started"

rust/operator-binary/src/hdfs_controller.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ use stackable_operator::{
5151
use strum::{EnumDiscriminants, IntoEnumIterator, IntoStaticStr};
5252

5353
use stackable_hdfs_crd::{
54-
constants::*, AnyNodeConfig, HdfsCluster, HdfsClusterStatus, HdfsPodRef, HdfsRole, UpgradeState,
54+
constants::*, AnyNodeConfig, HdfsCluster, HdfsClusterStatus, HdfsPodRef, HdfsRole,
55+
UpgradeState, UpgradeStateError,
5556
};
5657

5758
use crate::{
@@ -86,6 +87,9 @@ pub enum Error {
8687
source: stackable_operator::product_config_utils::Error,
8788
},
8889

90+
#[snafu(display("invalid upgrade state"))]
91+
InvalidUpgradeState { source: UpgradeStateError },
92+
8993
#[snafu(display("cannot create rolegroup service {name:?}"))]
9094
ApplyRoleGroupService {
9195
source: stackable_operator::cluster_resources::Error,
@@ -326,7 +330,7 @@ pub async fn reconcile_hdfs(hdfs: Arc<HdfsCluster>, ctx: Arc<Ctx>) -> HdfsOperat
326330
let dfs_replication = hdfs.spec.cluster_config.dfs_replication;
327331
let mut ss_cond_builder = StatefulSetConditionBuilder::default();
328332

329-
let upgrade_state = hdfs.upgrade_state();
333+
let upgrade_state = hdfs.upgrade_state().context(InvalidUpgradeStateSnafu)?;
330334
let mut deploy_done = true;
331335

332336
// Roles must be deployed in order during rolling upgrades,

0 commit comments

Comments
 (0)