Skip to content

Commit 341fad1

Browse files
authored
Fix edge-case where scale-from-zero of MCD is mistaken for a rolling update (#956)
* Fix edge-case where scale-from-zero is mistaken for rolling update * Address review comments * Address review comments of PR #954 * Address review comments * Fix unit test
1 parent f17b00e commit 341fad1

File tree

7 files changed

+147
-25
lines changed

7 files changed

+147
-25
lines changed

Makefile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,8 @@ add-license-headers: $(GO_ADD_LICENSE)
176176

177177
.PHONY: sast
178178
sast: $(GOSEC)
179-
@chmod +xw hack/sast.sh
180179
@./hack/sast.sh
181180

182181
.PHONY: sast-report
183182
sast-report:$(GOSEC)
184-
@chmod +xw hack/sast.sh
185183
@./hack/sast.sh --gosec-report true

pkg/apis/machine/v1alpha1/machinedeployment_types.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,9 @@ type RollingUpdateMachineDeployment struct {
132132
// Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%).
133133
// Absolute number is calculated from percentage by rounding down.
134134
// This can not be 0 if MaxSurge is 0.
135-
// By default, a fixed value of 1 is used.
136-
// Example: when this is set to 30%, the old MC can be scaled down to 70% of desired machines
137-
// immediately when the rolling update starts. Once new machines are ready, old MC
138-
// can be scaled down further, followed by scaling up the new MC, ensuring
135+
// Example: when this is set to 30%, the old machine set can be scaled down to 70% of desired machines
136+
// immediately when the rolling update starts. Once new machines are ready, old machine set
137+
// can be scaled down further, followed by scaling up the new machine set, ensuring
139138
// that the total number of machines available at all times during the update is at
140139
// least 70% of desired machines.
141140
// +optional
@@ -146,12 +145,11 @@ type RollingUpdateMachineDeployment struct {
146145
// Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%).
147146
// This can not be 0 if MaxUnavailable is 0.
148147
// Absolute number is calculated from percentage by rounding up.
149-
// By default, a value of 1 is used.
150-
// Example: when this is set to 30%, the new MC can be scaled up immediately when
151-
// the rolling update starts, such that the total number of old and new machines do not exceed
148+
// Example: when this is set to 30%, the new machine set can be scaled up immediately when
149+
// the rolling update starts, such that the total number of old and new machines does not exceed
152150
// 130% of desired machines. Once old machines have been killed,
153-
// new MC can be scaled up further, ensuring that total number of machines running
154-
// at any time during the update is atmost 130% of desired machines.
151+
// new machine set can be scaled up further, ensuring that total number of machines running
152+
// at any time during the update is utmost 130% of desired machines.
155153
// +optional
156154
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"`
157155
}

pkg/apis/machine/validation/machinedeployment.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ func canConvertIntOrStringToInt32(val *intstr.IntOrString, replicas int) bool {
3636

3737
func validateUpdateStrategy(spec *machine.MachineDeploymentSpec, fldPath *field.Path) field.ErrorList {
3838
allErrs := field.ErrorList{}
39-
if spec.Strategy.Type != "RollingUpdate" && spec.Strategy.Type != "Recreate" {
39+
if spec.Strategy.Type != machine.RollingUpdateMachineDeploymentStrategyType && spec.Strategy.Type != machine.RecreateMachineDeploymentStrategyType {
4040
allErrs = append(allErrs, field.Required(fldPath.Child("strategy.type"), "Type can either be RollingUpdate or Recreate"))
4141
}
42-
if spec.Strategy.Type == "RollingUpdate" {
42+
if spec.Strategy.Type == machine.RollingUpdateMachineDeploymentStrategyType {
4343
if spec.Strategy.RollingUpdate == nil {
4444
allErrs = append(allErrs, field.Required(fldPath.Child("strategy.rollingUpdate"), "RollingUpdate parameter cannot be nil for rolling update strategy"))
4545
} else {

pkg/controller/controller_utils_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,4 +552,61 @@ var _ = Describe("#controllerUtils", func() {
552552
}),
553553
)
554554
})
555+
Describe("##FilterActiveMachineSets", func() {
556+
testMachineSet := &machinev1.MachineSet{
557+
ObjectMeta: metav1.ObjectMeta{
558+
Name: "MachineSet-test",
559+
Namespace: testNamespace,
560+
Labels: map[string]string{
561+
"test-label": "test-label",
562+
},
563+
Annotations: map[string]string{
564+
"deployment.kubernetes.io/revision": "1",
565+
},
566+
UID: "1234567",
567+
OwnerReferences: []metav1.OwnerReference{
568+
{
569+
Kind: "MachineDeployment",
570+
Name: "MachineDeployment-test",
571+
UID: "1234567",
572+
Controller: nil,
573+
},
574+
},
575+
Generation: 5,
576+
},
577+
TypeMeta: metav1.TypeMeta{
578+
Kind: "MachineSet",
579+
APIVersion: "machine.sapcloud.io/v1alpha1",
580+
},
581+
Spec: machinev1.MachineSetSpec{
582+
Replicas: 0,
583+
Template: machinev1.MachineTemplateSpec{
584+
ObjectMeta: metav1.ObjectMeta{
585+
Labels: map[string]string{
586+
"test-label": "test-label",
587+
},
588+
},
589+
},
590+
Selector: &metav1.LabelSelector{
591+
MatchLabels: map[string]string{
592+
"test-label": "test-label",
593+
},
594+
},
595+
},
596+
}
597+
It("should return an empty list when machine sets have 0 replicas", func() {
598+
testMachineSet.Spec.Replicas = 0
599+
testMachineSets := []*machinev1.MachineSet{
600+
testMachineSet,
601+
}
602+
Expect(FilterActiveMachineSets(testMachineSets)).To(HaveLen(0))
603+
})
604+
It("should return expected machine sets when replicas are positive", func() {
605+
testMachineSet.Spec.Replicas = 1
606+
testMachineSets := []*machinev1.MachineSet{
607+
testMachineSet,
608+
}
609+
Expect(FilterActiveMachineSets(testMachineSets)).To(HaveLen(1))
610+
})
611+
})
555612
})

pkg/controller/deployment_sync.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ func calculateDeploymentStatus(allISs []*v1alpha1.MachineSet, newIS *v1alpha1.Ma
666666

667667
// isScalingEvent checks whether the provided deployment has been updated with a scaling event
668668
// by looking at the desired-replicas annotation in the active machine sets of the deployment, and returns if there was scale-out or not.
669+
// However, when there are no active machine sets, but the replica count in the machine deployment's spec > 0, it is recognized as a scale-out event.
669670
//
670671
// rsList should come from getReplicaSetsForDeployment(d).
671672
// machineMap should come from getmachineMapForDeployment(d, rsList).
@@ -674,8 +675,16 @@ func (dc *controller) isScalingEvent(ctx context.Context, d *v1alpha1.MachineDep
674675
if err != nil {
675676
return false, err
676677
}
678+
if newIS == nil {
679+
return false, nil
680+
}
677681
allISs := append(oldISs, newIS)
678-
for _, is := range FilterActiveMachineSets(allISs) {
682+
activeMachineSets := FilterActiveMachineSets(allISs)
683+
//if this is a scale from zero scenario, return true
684+
if len(activeMachineSets) == 0 && d.Spec.Replicas > 0 {
685+
return true, nil
686+
}
687+
for _, is := range activeMachineSets {
679688
desired, ok := GetDesiredReplicasAnnotation(is)
680689
if !ok {
681690
continue

pkg/test/integration/common/framework.go

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"encoding/json"
1010
"fmt"
11+
"github.com/gardener/machine-controller-manager/pkg/controller"
1112
"io"
1213
"log"
1314
"os"
@@ -953,27 +954,74 @@ func (c *IntegrationTestFramework) ControllerTests() {
953954
// Testcase #02 | machine deployment
954955
ginkgo.Describe("machine deployment resource", func() {
955956
var initialNodes int16 // initialization should be part of creation test logic
956-
ginkgo.Context("creation with replicas=3", func() {
957-
ginkgo.It("should not lead to errors and add 3 more nodes to target cluster", func() {
957+
ginkgo.Context("creation with replicas=0, scale up with replicas=1", func() {
958+
ginkgo.It("should not lead to errors and add 1 more node to target cluster", func() {
959+
var machineDeployment *v1alpha1.MachineDeployment
958960
//probe initialnodes before continuing
959961
initialNodes = c.TargetCluster.GetNumberOfNodes()
960962

961963
ginkgo.By("Checking for errors")
962-
gomega.Expect(c.ControlCluster.CreateMachineDeployment(controlClusterNamespace)).To(gomega.BeNil())
964+
gomega.Expect(c.ControlCluster.CreateMachineDeployment(controlClusterNamespace, 0)).To(gomega.BeNil())
963965

964-
ginkgo.By("Waiting until number of ready nodes are 3 more than initial")
966+
ginkgo.By("Waiting for Machine Set to be created")
967+
gomega.Eventually(func() int { return c.getNumberOfMachineSets(ctx, controlClusterNamespace) }, c.timeout, c.pollingInterval).Should(gomega.BeNumerically("==", 1))
968+
969+
ginkgo.By("Updating machineDeployment replicas to 1")
970+
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
971+
machineDeployment, _ = c.ControlCluster.McmClient.
972+
MachineV1alpha1().
973+
MachineDeployments(controlClusterNamespace).
974+
Get(ctx, "test-machine-deployment", metav1.GetOptions{})
975+
machineDeployment.Spec.Replicas = 1
976+
_, updateErr := c.ControlCluster.McmClient.
977+
MachineV1alpha1().
978+
MachineDeployments(controlClusterNamespace).
979+
Update(ctx, machineDeployment, metav1.UpdateOptions{})
980+
return updateErr
981+
})
982+
gomega.Expect(retryErr).NotTo(gomega.HaveOccurred())
983+
984+
//check machineDeploymentStatus to make sure correct condition is reflected
985+
ginkgo.By("Checking if machineDeployment's status has been updated with correct conditions")
986+
gomega.Eventually(func() bool {
987+
var err error
988+
machineDeployment, err = c.ControlCluster.McmClient.
989+
MachineV1alpha1().
990+
MachineDeployments(controlClusterNamespace).
991+
Get(ctx, "test-machine-deployment", metav1.GetOptions{})
992+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
993+
if len(machineDeployment.Status.Conditions) != 2 {
994+
return false
995+
}
996+
isMCDAvailable := false
997+
hasMCDProgressed := false
998+
for _, condition := range machineDeployment.Status.Conditions {
999+
if condition.Type == v1alpha1.MachineDeploymentAvailable && condition.Status == v1alpha1.ConditionTrue && condition.Reason == controller.MinimumReplicasAvailable {
1000+
isMCDAvailable = true
1001+
}
1002+
if condition.Type == v1alpha1.MachineDeploymentProgressing && condition.Status == v1alpha1.ConditionTrue && condition.Reason == controller.NewISAvailableReason {
1003+
hasMCDProgressed = true
1004+
}
1005+
}
1006+
return hasMCDProgressed && isMCDAvailable && machineDeployment.Status.AvailableReplicas == 1
1007+
}, c.timeout, c.pollingInterval).Should(gomega.BeTrue())
1008+
1009+
//check number of ready nodes
1010+
ginkgo.By("Checking number of ready nodes==1")
9651011
gomega.Eventually(
9661012
c.TargetCluster.GetNumberOfNodes,
967-
c.timeout, c.pollingInterval).
968-
Should(gomega.BeNumerically("==", initialNodes+3))
1013+
c.timeout,
1014+
c.pollingInterval).
1015+
Should(gomega.BeNumerically("==", initialNodes+1))
9691016
gomega.Eventually(
9701017
c.TargetCluster.GetNumberOfReadyNodes,
971-
c.timeout, c.pollingInterval).
972-
Should(gomega.BeNumerically("==", initialNodes+3))
1018+
c.timeout,
1019+
c.pollingInterval).
1020+
Should(gomega.BeNumerically("==", initialNodes+1))
9731021
})
9741022
})
9751023
ginkgo.Context("scale-up with replicas=6", func() {
976-
ginkgo.It("should not lead to errors and add futher 3 nodes to target cluster", func() {
1024+
ginkgo.It("should not lead to errors and add further 5 nodes to target cluster", func() {
9771025
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
9781026
machineDployment, _ := c.ControlCluster.McmClient.
9791027
MachineV1alpha1().
@@ -1407,6 +1455,18 @@ func (c *IntegrationTestFramework) Cleanup() {
14071455

14081456
}
14091457

1458+
func (c *IntegrationTestFramework) getNumberOfMachineSets(ctx context.Context, namespace string) int {
1459+
machineSets, err := c.ControlCluster.McmClient.MachineV1alpha1().MachineSets(namespace).List(ctx, metav1.ListOptions{})
1460+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
1461+
count := 0
1462+
for _, machineSet := range machineSets.Items {
1463+
if machineSet.OwnerReferences[0].Name == "test-machine-deployment" {
1464+
count++
1465+
}
1466+
}
1467+
return count
1468+
}
1469+
14101470
func checkMcmRepoAvailable() {
14111471
ginkgo.By("Checking Machine-Controller-Manager repo is available at: " + mcmRepoPath)
14121472
_, err := os.Stat(mcmRepoPath)

pkg/test/integration/common/helpers/machine_resources.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func (c *Cluster) CreateMachine(namespace string) error {
4040
}
4141

4242
// CreateMachineDeployment creates a test-machine-deployment with 3 replicas and returns error if it occurs
43-
func (c *Cluster) CreateMachineDeployment(namespace string) error {
43+
func (c *Cluster) CreateMachineDeployment(namespace string, replicas int32) error {
4444
labels := map[string]string{"test-label": "test-label"}
4545
_, err := c.McmClient.
4646
MachineV1alpha1().
@@ -53,7 +53,7 @@ func (c *Cluster) CreateMachineDeployment(namespace string) error {
5353
Namespace: namespace,
5454
},
5555
Spec: v1alpha1.MachineDeploymentSpec{
56-
Replicas: 3,
56+
Replicas: replicas,
5757
MinReadySeconds: 500,
5858
Strategy: v1alpha1.MachineDeploymentStrategy{
5959
Type: v1alpha1.RollingUpdateMachineDeploymentStrategyType,

0 commit comments

Comments
 (0)