Skip to content

Commit e22e2ca

Browse files
authored
Merge pull request #11394 from sbueringer/pr-cluster-topology-conditions
🌱 Implement Cluster TopologyReconciled v1beta2 condition
2 parents e13abea + 1ea263b commit e22e2ca

File tree

7 files changed

+525
-92
lines changed

7 files changed

+525
-92
lines changed

api/v1beta1/cluster_types.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,59 @@ const (
5656
// ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly.
5757
// Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology.
5858
ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled"
59+
60+
// ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded.
61+
ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded"
62+
63+
// ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology
64+
// failing due to an error.
65+
ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed"
66+
67+
// ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
68+
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
69+
ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending"
70+
71+
// ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
72+
// not yet completed because at least one of the MachineDeployments is yet to be created.
73+
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
74+
ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending"
75+
76+
// ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
77+
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
78+
ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending"
79+
80+
// ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
81+
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
82+
ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred"
83+
84+
// ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology
85+
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
86+
ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending"
87+
88+
// ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology
89+
// not yet completed because at least one of the MachinePools is yet to be created.
90+
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
91+
ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending"
92+
93+
// ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology
94+
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
95+
ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred"
96+
97+
// ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology
98+
// not yet completed because at least one of the lifecycle hooks is blocking.
99+
ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking"
100+
101+
// ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not
102+
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
103+
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
104+
ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled"
105+
106+
// ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the
107+
// DeletionTimestamp is set.
108+
ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason
109+
110+
// ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused.
111+
ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason
59112
)
60113

61114
// Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version.

api/v1beta1/condition_consts.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,9 @@ const (
337337
// yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue
338338
// with the ClusterClass surfaced in the ClusterClass status or controller logs.
339339
TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled"
340+
341+
// TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused.
342+
TopologyReconciledPausedReason = "Paused"
340343
)
341344

342345
// Conditions and condition reasons for ClusterClass.

internal/controllers/cluster/cluster_controller_status.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,12 @@ func (c clusterConditionCustomMergeStrategy) Merge(conditions []v1beta2condition
849849
return v1beta2conditions.InfoMergePriority
850850
}
851851
}
852+
853+
// Treat all reasons except TopologyReconcileFailed and ClusterClassNotReconciled of TopologyReconciled condition as info.
854+
if condition.Type == clusterv1.ClusterTopologyReconciledV1Beta2Condition && condition.Status == metav1.ConditionFalse &&
855+
condition.Reason != clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason && condition.Reason != clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason {
856+
return v1beta2conditions.InfoMergePriority
857+
}
852858
return v1beta2conditions.GetDefaultMergePriorityFunc(c.negativePolarityConditionTypes)(condition)
853859
}).Merge(conditions, conditionTypes)
854860
}

internal/controllers/cluster/cluster_controller_status_test.go

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,231 @@ func TestSetAvailableCondition(t *testing.T) {
19491949
Reason: v1beta2conditions.MultipleInfoReportedReason,
19501950
},
19511951
},
1952+
{
1953+
name: "Surfaces message from TopologyReconciled for reason that doesn't affect availability (no other issues)",
1954+
cluster: &clusterv1.Cluster{
1955+
ObjectMeta: metav1.ObjectMeta{
1956+
Name: "machine-test",
1957+
Namespace: metav1.NamespaceDefault,
1958+
},
1959+
Spec: clusterv1.ClusterSpec{
1960+
Topology: &clusterv1.Topology{}, // using CC
1961+
},
1962+
Status: clusterv1.ClusterStatus{
1963+
V1Beta2: &clusterv1.ClusterV1Beta2Status{
1964+
Conditions: []metav1.Condition{
1965+
{
1966+
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
1967+
Status: metav1.ConditionTrue,
1968+
Reason: "Foo",
1969+
},
1970+
{
1971+
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
1972+
Status: metav1.ConditionTrue,
1973+
Reason: "Foo",
1974+
},
1975+
{
1976+
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
1977+
Status: metav1.ConditionTrue,
1978+
Reason: "Foo",
1979+
},
1980+
{
1981+
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
1982+
Status: metav1.ConditionTrue,
1983+
Reason: "Foo",
1984+
},
1985+
{
1986+
Type: clusterv1.ClusterDeletingV1Beta2Condition,
1987+
Status: metav1.ConditionFalse,
1988+
Reason: "Foo",
1989+
},
1990+
{
1991+
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
1992+
Status: metav1.ConditionFalse,
1993+
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
1994+
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
1995+
},
1996+
},
1997+
},
1998+
},
1999+
},
2000+
expectCondition: metav1.Condition{
2001+
Type: clusterv1.ClusterAvailableV1Beta2Condition,
2002+
Status: metav1.ConditionTrue,
2003+
Reason: v1beta2conditions.MultipleInfoReportedReason,
2004+
Message: "TopologyReconciled: Control plane rollout and upgrade to version v1.29.0 on hold.",
2005+
},
2006+
},
2007+
{
2008+
name: "Drops messages from TopologyReconciled for reason that doesn't affect availability (when there is another issue)",
2009+
cluster: &clusterv1.Cluster{
2010+
ObjectMeta: metav1.ObjectMeta{
2011+
Name: "machine-test",
2012+
Namespace: metav1.NamespaceDefault,
2013+
},
2014+
Spec: clusterv1.ClusterSpec{
2015+
Topology: &clusterv1.Topology{}, // using CC
2016+
},
2017+
Status: clusterv1.ClusterStatus{
2018+
V1Beta2: &clusterv1.ClusterV1Beta2Status{
2019+
Conditions: []metav1.Condition{
2020+
{
2021+
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
2022+
Status: metav1.ConditionTrue,
2023+
Reason: "Foo",
2024+
},
2025+
{
2026+
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
2027+
Status: metav1.ConditionTrue,
2028+
Reason: "Foo",
2029+
},
2030+
{
2031+
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
2032+
Status: metav1.ConditionFalse,
2033+
Reason: v1beta2conditions.MultipleIssuesReportedReason,
2034+
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
2035+
},
2036+
{
2037+
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
2038+
Status: metav1.ConditionTrue,
2039+
Reason: "Foo",
2040+
},
2041+
{
2042+
Type: clusterv1.ClusterDeletingV1Beta2Condition,
2043+
Status: metav1.ConditionFalse,
2044+
Reason: "Foo",
2045+
},
2046+
{
2047+
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
2048+
Status: metav1.ConditionFalse,
2049+
Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason,
2050+
Message: "Control plane rollout and upgrade to version v1.29.0 on hold.",
2051+
},
2052+
},
2053+
},
2054+
},
2055+
},
2056+
expectCondition: metav1.Condition{
2057+
Type: clusterv1.ClusterAvailableV1Beta2Condition,
2058+
Status: metav1.ConditionFalse,
2059+
Reason: v1beta2conditions.MultipleIssuesReportedReason, // Note: There is only one condition that is an issue, but it has the MultipleIssuesReported reason.
2060+
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
2061+
},
2062+
},
2063+
{
2064+
name: "Takes into account messages from TopologyReconciled for reason that affects availability (no other issues)",
2065+
cluster: &clusterv1.Cluster{
2066+
ObjectMeta: metav1.ObjectMeta{
2067+
Name: "machine-test",
2068+
Namespace: metav1.NamespaceDefault,
2069+
},
2070+
Spec: clusterv1.ClusterSpec{
2071+
Topology: &clusterv1.Topology{}, // using CC
2072+
},
2073+
Status: clusterv1.ClusterStatus{
2074+
V1Beta2: &clusterv1.ClusterV1Beta2Status{
2075+
Conditions: []metav1.Condition{
2076+
{
2077+
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
2078+
Status: metav1.ConditionTrue,
2079+
Reason: "Foo",
2080+
},
2081+
{
2082+
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
2083+
Status: metav1.ConditionTrue,
2084+
Reason: "Foo",
2085+
},
2086+
{
2087+
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
2088+
Status: metav1.ConditionTrue,
2089+
Reason: "Foo",
2090+
},
2091+
{
2092+
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
2093+
Status: metav1.ConditionTrue,
2094+
Reason: "Foo",
2095+
},
2096+
{
2097+
Type: clusterv1.ClusterDeletingV1Beta2Condition,
2098+
Status: metav1.ConditionFalse,
2099+
Reason: "Foo",
2100+
},
2101+
{
2102+
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
2103+
Status: metav1.ConditionFalse,
2104+
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
2105+
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
2106+
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
2107+
},
2108+
},
2109+
},
2110+
},
2111+
},
2112+
expectCondition: metav1.Condition{
2113+
Type: clusterv1.ClusterAvailableV1Beta2Condition,
2114+
Status: metav1.ConditionFalse,
2115+
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
2116+
Message: "TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
2117+
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
2118+
},
2119+
},
2120+
{
2121+
name: "Takes into account messages from TopologyReconciled for reason that affects availability (when there is another issue)",
2122+
cluster: &clusterv1.Cluster{
2123+
ObjectMeta: metav1.ObjectMeta{
2124+
Name: "machine-test",
2125+
Namespace: metav1.NamespaceDefault,
2126+
},
2127+
Spec: clusterv1.ClusterSpec{
2128+
Topology: &clusterv1.Topology{}, // using CC
2129+
},
2130+
Status: clusterv1.ClusterStatus{
2131+
V1Beta2: &clusterv1.ClusterV1Beta2Status{
2132+
Conditions: []metav1.Condition{
2133+
{
2134+
Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition,
2135+
Status: metav1.ConditionTrue,
2136+
Reason: "Foo",
2137+
},
2138+
{
2139+
Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition,
2140+
Status: metav1.ConditionTrue,
2141+
Reason: "Foo",
2142+
},
2143+
{
2144+
Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition,
2145+
Status: metav1.ConditionFalse,
2146+
Reason: v1beta2conditions.MultipleIssuesReportedReason,
2147+
Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1",
2148+
},
2149+
{
2150+
Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition,
2151+
Status: metav1.ConditionTrue,
2152+
Reason: "Foo",
2153+
},
2154+
{
2155+
Type: clusterv1.ClusterDeletingV1Beta2Condition,
2156+
Status: metav1.ConditionFalse,
2157+
Reason: "Foo",
2158+
},
2159+
{
2160+
Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition,
2161+
Status: metav1.ConditionFalse,
2162+
Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason,
2163+
Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" +
2164+
".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
2165+
},
2166+
},
2167+
},
2168+
},
2169+
},
2170+
expectCondition: metav1.Condition{
2171+
Type: clusterv1.ClusterAvailableV1Beta2Condition,
2172+
Status: metav1.ConditionFalse,
2173+
Reason: v1beta2conditions.MultipleIssuesReportedReason,
2174+
Message: "WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1; TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if.status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused",
2175+
},
2176+
},
19522177
}
19532178

19542179
for _, tc := range testCases {

internal/controllers/topology/cluster/cluster_controller.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
120120
builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)),
121121
).
122122
WithOptions(options).
123-
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
123+
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)).
124124
Build(r)
125125

126126
if err != nil {
@@ -175,13 +175,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
175175
return ctrl.Result{}, nil
176176
}
177177

178-
// Return early if the Cluster is paused.
179-
// TODO: What should we do if the cluster class is paused?
180-
if annotations.IsPaused(cluster, cluster) {
181-
log.Info("Reconciliation is paused for this object")
182-
return ctrl.Result{}, nil
183-
}
184-
185178
patchHelper, err := patch.NewHelper(cluster, r.Client)
186179
if err != nil {
187180
return ctrl.Result{}, err
@@ -200,14 +193,21 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
200193
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
201194
clusterv1.TopologyReconciledCondition,
202195
}},
203-
patch.WithForceOverwriteConditions{},
196+
patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
197+
clusterv1.ClusterTopologyReconciledV1Beta2Condition,
198+
}},
204199
}
205200
if err := patchHelper.Patch(ctx, cluster, options...); err != nil {
206201
reterr = kerrors.NewAggregate([]error{reterr, err})
207202
return
208203
}
209204
}()
210205

206+
// Return early if the Cluster is paused.
207+
if cluster.Spec.Paused || annotations.HasPaused(cluster) {
208+
return ctrl.Result{}, nil
209+
}
210+
211211
// In case the object is deleted, the managed topology stops to reconcile;
212212
// (the other controllers will take care of deletion).
213213
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {

0 commit comments

Comments
 (0)