Skip to content

Commit 41fb2af

Browse files
acuminoshafeeqesary1992
authored
[GEP-31] Introduce label constants and MachineConfiguration options for in-place updates (#970)
* Introduce labels constants for InPlaceUpdate Co-Authored-By: Shafeeque E S <shafeeque.e.s@sap.com> Co-Authored-By: Ashish Ranjan Yadav <ashish.ranjan.yadav@sap.com> * Introduce `MachineInPlaceUpdateTimeout` field Co-authored-by: Shafeeque E S <shafeeque.e.s@sap.com> Co-authored-by: Ashish Ranjan Yadav <ashish.ranjan.yadav@sap.com> * Introduce `DisableHealthTimeout` field Co-authored-by: Shafeeque E S <shafeeque.e.s@sap.com> Co-authored-by: Ashish Ranjan Yadav <ashish.ranjan.yadav@sap.com> * Run `make generate` Co-authored-by: Shafeeque E S <shafeeque.e.s@sap.com> Co-authored-by: Ashish Ranjan Yadav <ashish.ranjan.yadav@sap.com> * Nits * Address Review * Address Review --------- Co-authored-by: Shafeeque E S <shafeeque.e.s@sap.com> Co-authored-by: Ashish Ranjan Yadav <ashish.ranjan.yadav@sap.com>
1 parent 73405d0 commit 41fb2af

18 files changed

+157
-11
lines changed

docs/documents/apis.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,36 @@ Kubernetes meta/v1.Duration
10551055
</tr>
10561056
<tr>
10571057
<td>
1058+
<code>inPlaceUpdateTimeout</code>
1059+
</td>
1060+
<td>
1061+
<em>
1062+
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
1063+
Kubernetes meta/v1.Duration
1064+
</a>
1065+
</em>
1066+
</td>
1067+
<td>
1068+
<em>(Optional)</em>
1069+
<p>MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed.</p>
1070+
</td>
1071+
</tr>
1072+
<tr>
1073+
<td>
1074+
<code>disableHealthTimeout</code>
1075+
</td>
1076+
<td>
1077+
<em>
1078+
*bool
1079+
</em>
1080+
</td>
1081+
<td>
1082+
<em>(Optional)</em>
1083+
<p>DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed.</p>
1084+
</td>
1085+
</tr>
1086+
<tr>
1087+
<td>
10581088
<code>maxEvictRetries</code>
10591089
</td>
10601090
<td>

kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,11 @@ spec:
279279
description: MachineCreationTimeout is the timeout after which
280280
machinie creation is declared failed.
281281
type: string
282+
disableHealthTimeout:
283+
description: DisableHealthTimeout if set to true, health timeout
284+
will be ignored. Leading to machine never being declared
285+
failed.
286+
type: boolean
282287
drainTimeout:
283288
description: MachineDraintimeout is the timeout after which
284289
machine is forcefully deleted.
@@ -287,6 +292,10 @@ spec:
287292
description: MachineHealthTimeout is the timeout after which
288293
machine is declared unhealhty/failed.
289294
type: string
295+
inPlaceUpdateTimeout:
296+
description: MachineInPlaceUpdateTimeout is the timeout after
297+
which in-place update is declared failed.
298+
type: string
290299
maxEvictRetries:
291300
description: MaxEvictRetries is the number of retries that
292301
will be attempted while draining the node.

kubernetes/crds/machine.sapcloud.io_machines.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ spec:
7878
description: MachineCreationTimeout is the timeout after which machinie
7979
creation is declared failed.
8080
type: string
81+
disableHealthTimeout:
82+
description: DisableHealthTimeout if set to true, health timeout will
83+
be ignored. Leading to machine never being declared failed.
84+
type: boolean
8185
drainTimeout:
8286
description: MachineDraintimeout is the timeout after which machine
8387
is forcefully deleted.
@@ -86,6 +90,10 @@ spec:
8690
description: MachineHealthTimeout is the timeout after which machine
8791
is declared unhealhty/failed.
8892
type: string
93+
inPlaceUpdateTimeout:
94+
description: MachineInPlaceUpdateTimeout is the timeout after which
95+
in-place update is declared failed.
96+
type: string
8997
maxEvictRetries:
9098
description: MaxEvictRetries is the number of retries that will be
9199
attempted while draining the node.

kubernetes/crds/machine.sapcloud.io_machinesets.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ spec:
161161
description: MachineCreationTimeout is the timeout after which
162162
machinie creation is declared failed.
163163
type: string
164+
disableHealthTimeout:
165+
description: DisableHealthTimeout if set to true, health timeout
166+
will be ignored. Leading to machine never being declared
167+
failed.
168+
type: boolean
164169
drainTimeout:
165170
description: MachineDraintimeout is the timeout after which
166171
machine is forcefully deleted.
@@ -169,6 +174,10 @@ spec:
169174
description: MachineHealthTimeout is the timeout after which
170175
machine is declared unhealhty/failed.
171176
type: string
177+
inPlaceUpdateTimeout:
178+
description: MachineInPlaceUpdateTimeout is the timeout after
179+
which in-place update is declared failed.
180+
type: string
172181
maxEvictRetries:
173182
description: MaxEvictRetries is the number of retries that
174183
will be attempted while draining the node.

pkg/apis/machine/types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ type MachineConfiguration struct {
9494
// MachineCreationTimeout is the timeout after which machinie creation is declared failed.
9595
MachineCreationTimeout *metav1.Duration
9696

97+
// MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed.
98+
MachineInPlaceUpdateTimeout *metav1.Duration
99+
100+
// DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed.
101+
// This is intended to be used only for in-place updates.
102+
DisableHealthTimeout *bool
103+
97104
// MaxEvictRetries is the number of retries that will be attempted while draining the node.
98105
MaxEvictRetries *int32
99106

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and Gardener contributors
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package v1alpha1
6+
7+
const (
8+
// AnnotationKeyMachineUpdateFailedReason is the annotation key that indicates the reason for a machine update failure.
9+
AnnotationKeyMachineUpdateFailedReason = "node.machine.sapcloud.io/update-failed-reason"
10+
11+
// LabelKeyNodeCandidateForUpdate is the label key that indicates a node is a candidate for update.
12+
LabelKeyNodeCandidateForUpdate = "node.machine.sapcloud.io/candidate-for-update"
13+
// LabelKeyNodeSelectedForUpdate is the label key that indicates a node has been selected for update.
14+
LabelKeyNodeSelectedForUpdate = "node.machine.sapcloud.io/selected-for-update"
15+
// LabelKeyNodeUpdateResult is the label key that indicates the result of the update on the node.
16+
LabelKeyNodeUpdateResult = "node.machine.sapcloud.io/update-result"
17+
18+
// LabelValueNodeUpdateSuccessful is the label value that indicates the update on the node has succeeded.
19+
LabelValueNodeUpdateSuccessful = "successful"
20+
// LabelValueNodeUpdateFailed is the label value that indicates the update on the node has failed.
21+
LabelValueNodeUpdateFailed = "failed"
22+
)

pkg/apis/machine/v1alpha1/register.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@ func Resource(resource string) schema.GroupResource {
3131
return SchemeGroupVersion.WithResource(resource).GroupResource()
3232
}
3333

34-
// func Init() {
35-
// // We only register manually written functions here. The registration of the
36-
// // generated functions takes place in the generated files. The separation
37-
// // makes the code compile even when the generated files are missing.
38-
// SchemeBuilder.Register(addKnownTypes)
39-
// }
40-
4134
// Adds the list of known types to api.Scheme.
4235
func addKnownTypes(scheme *runtime.Scheme) error {
4336
scheme.AddKnownTypes(SchemeGroupVersion,

pkg/apis/machine/v1alpha1/shared_types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ type MachineConfiguration struct {
4040
// +optional
4141
MachineCreationTimeout *metav1.Duration `json:"creationTimeout,omitempty"`
4242

43+
// MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed.
44+
// +optional
45+
MachineInPlaceUpdateTimeout *metav1.Duration `json:"inPlaceUpdateTimeout,omitempty"`
46+
47+
// DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed.
48+
// This is intended to be used only for in-place updates.
49+
// +optional
50+
DisableHealthTimeout *bool `json:"disableHealthTimeout,omitempty"`
51+
4352
// MaxEvictRetries is the number of retries that will be attempted while draining the node.
4453
// +optional
4554
MaxEvictRetries *int32 `json:"maxEvictRetries,omitempty"`

pkg/apis/machine/v1alpha1/zz_generated.conversion.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/apis/machine/zz_generated.deepcopy.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/controller/machineset.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1
339339
var activeMachines, staleMachines []*v1alpha1.Machine
340340
for _, machine := range allMachines {
341341
if IsMachineActive(machine) {
342-
// klog.Info("Active machine: ", machine.Name)
343342
activeMachines = append(activeMachines, machine)
344343
} else if IsMachineFailed(machine) {
345344
staleMachines = append(staleMachines, machine)

pkg/openapi/api_violations.report

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ API rule violation: list_type_missing,github.com/gardener/machine-controller-man
55
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineCreationTimeout
66
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineDrainTimeout
77
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineHealthTimeout
8+
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineInPlaceUpdateTimeout
89
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSetStatus,Conditions
910
API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSpec,NodeTemplateSpec
1011
API rule violation: names_match,k8s.io/api/core/v1,AzureDiskVolumeSource,DataDiskURI

pkg/openapi/openapi_generated.go

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/util/provider/app/options/options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func NewMCServer() *MCServer {
6767
MachineCreationTimeout: metav1.Duration{Duration: 20 * time.Minute},
6868
MachineHealthTimeout: metav1.Duration{Duration: 10 * time.Minute},
6969
MachineDrainTimeout: metav1.Duration{Duration: drain.DefaultMachineDrainTimeout},
70+
MachineInPlaceUpdateTimeout: metav1.Duration{Duration: 20 * time.Minute},
7071
MaxEvictRetries: drain.DefaultMaxEvictRetries,
7172
PvDetachTimeout: metav1.Duration{Duration: 2 * time.Minute},
7273
PvReattachTimeout: metav1.Duration{Duration: 90 * time.Second},
@@ -100,6 +101,7 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) {
100101
fs.DurationVar(&s.SafetyOptions.MachineCreationTimeout.Duration, "machine-creation-timeout", s.SafetyOptions.MachineCreationTimeout.Duration, "Timeout (in durartion) used while joining (during creation) of machine before it is declared as failed.")
101102
fs.DurationVar(&s.SafetyOptions.MachineHealthTimeout.Duration, "machine-health-timeout", s.SafetyOptions.MachineHealthTimeout.Duration, "Timeout (in durartion) used while re-joining (in case of temporary health issues) of machine before it is declared as failed.")
102103
fs.DurationVar(&s.SafetyOptions.MachineDrainTimeout.Duration, "machine-drain-timeout", drain.DefaultMachineDrainTimeout, "Timeout (in durartion) used while draining of machine before deletion, beyond which MCM forcefully deletes machine.")
104+
fs.DurationVar(&s.SafetyOptions.MachineInPlaceUpdateTimeout.Duration, "machine-inplace-update-timeout", s.SafetyOptions.MachineInPlaceUpdateTimeout.Duration, "Timeout (in duration) used while updating a machine in-place, beyond which it is declared as failed.")
103105
fs.Int32Var(&s.SafetyOptions.MaxEvictRetries, "machine-max-evict-retries", drain.DefaultMaxEvictRetries, "Maximum number of times evicts would be attempted on a pod before it is forcibly deleted during draining of a machine.")
104106
fs.DurationVar(&s.SafetyOptions.PvDetachTimeout.Duration, "machine-pv-detach-timeout", s.SafetyOptions.PvDetachTimeout.Duration, "Timeout (in duration) used while waiting for detach of PV while evicting/deleting pods")
105107
fs.DurationVar(&s.SafetyOptions.PvReattachTimeout.Duration, "machine-pv-reattach-timeout", s.SafetyOptions.PvReattachTimeout.Duration, "Timeout (in duration) used while waiting for reattach of PV onto a different node")

pkg/util/provider/drain/drain.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ import (
2727
"errors"
2828
"fmt"
2929
"io"
30-
"k8s.io/apimachinery/pkg/labels"
31-
"k8s.io/client-go/tools/cache"
3230
"regexp"
3331
"sort"
3432
"strings"
@@ -41,11 +39,13 @@ import (
4139
storagev1 "k8s.io/api/storage/v1"
4240
apierrors "k8s.io/apimachinery/pkg/api/errors"
4341
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
42+
"k8s.io/apimachinery/pkg/labels"
4443
utilerrors "k8s.io/apimachinery/pkg/util/errors"
4544
"k8s.io/apimachinery/pkg/util/wait"
4645
"k8s.io/client-go/kubernetes"
4746
corelisters "k8s.io/client-go/listers/core/v1"
4847
policyv1listers "k8s.io/client-go/listers/policy/v1"
48+
"k8s.io/client-go/tools/cache"
4949
"k8s.io/klog/v2"
5050

5151
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"

pkg/util/provider/machineutils/utils.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
package machineutils
77

88
import (
9-
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
109
"time"
1110

1211
v1 "k8s.io/api/core/v1"
12+
13+
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
1314
)
1415

1516
const (
@@ -69,6 +70,9 @@ const (
6970

7071
// MachineLabelKey defines the labels which contains the name of the machine of a node
7172
MachineLabelKey = "node.gardener.cloud/machine-name"
73+
74+
// LabelKeyMachineSetScaleUpDisabled is the label key that indicates scaling up of the machine set is disabled.
75+
LabelKeyMachineSetScaleUpDisabled = "node.machine.sapcloud.io/scale-up-disabled"
7276
)
7377

7478
// RetryPeriod is an alias for specifying the retry period

pkg/util/provider/options/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ type SafetyOptions struct {
8787
// Timeout (in duration) used while draining of machine before deletion,
8888
// beyond which it forcefully deletes machine
8989
MachineDrainTimeout metav1.Duration
90+
// Timeout (in duration) used while in-place updating of a machine,
91+
// beyond which it is declared as failed
92+
MachineInPlaceUpdateTimeout metav1.Duration
9093
// Maximum number of times evicts would be attempted on a pod for it is forcibly deleted
9194
// during draining of a machine.
9295
MaxEvictRetries int32

0 commit comments

Comments
 (0)