Skip to content

Commit 9bded0e

Browse files
authored
Modify Machine Creation flow to make sure node label is updated before initialization of VM. Modify Deletion flow to call DeleteMachine even if VM is not found. (#940)
* Change order in which labels are updated and machine is initialized * Change how node label is updated in getVMStatus. Proceed to drain if VM not found * Remove error log when initializeMachine is unimplemented * Change where initializeMachine is called in triggerCreationFlow * Correct GetVMStatus flow from previous commit * Remove unnecessary comments * Initialize VM and update labels in the same reconciliation * Fix make check errors * Address review comments * Address review comments part 2 * Add comment for future changes to triggerCreationFlow to clean up dirty code * Correct err variable assignment * Shift comment to the right place
1 parent da00fec commit 9bded0e

File tree

4 files changed

+139
-126
lines changed

4 files changed

+139
-126
lines changed

pkg/util/provider/driver/fake.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,14 @@ func (d *FakeDriver) DeleteMachine(_ context.Context, deleteMachineRequest *Dele
9292

9393
// GetMachineStatus makes a gRPC call to the driver to check existance of machine
9494
func (d *FakeDriver) GetMachineStatus(_ context.Context, _ *GetMachineStatusRequest) (*GetMachineStatusResponse, error) {
95-
switch {
96-
case !d.VMExists:
95+
if !d.VMExists {
9796
errMessage := "Fake plugin is returning no VM instances backing this machine object"
9897
return nil, status.Error(codes.NotFound, errMessage)
99-
case d.Err != nil:
100-
return nil, d.Err
10198
}
10299
return &GetMachineStatusResponse{
103100
ProviderID: d.ProviderID,
104101
NodeName: d.NodeName,
105-
}, nil
102+
}, d.Err
106103
}
107104

108105
// ListMachines have to list machines

pkg/util/provider/machinecontroller/machine.go

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
377377
Secret: createMachineRequest.Secret,
378378
},
379379
)
380-
381380
if err != nil {
382381
// VM with required name is not found.
383382
machineErr, ok := status.FromError(err)
@@ -387,7 +386,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
387386
return machineutils.MediumRetry, err
388387
}
389388
klog.Warningf("For machine %q, obtained VM error status as: %s", machineName, machineErr)
390-
391389
// Decoding machine error code
392390
switch machineErr.Code() {
393391
case codes.NotFound, codes.Unimplemented:
@@ -404,13 +402,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
404402
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
405403
return c.machineCreateErrorHandler(ctx, machine, createMachineResponse, err)
406404
}
407-
408405
nodeName = createMachineResponse.NodeName
409406
providerID = createMachineResponse.ProviderID
410-
411407
// Creation was successful
412408
klog.V(2).Infof("Created new VM for machine: %q with ProviderID: %q and backing node: %q", machine.Name, providerID, getNodeName(machine))
413-
414409
// If a node obj already exists by the same nodeName, treat it as a stale node and trigger machine deletion.
415410
// TODO: there is a case with Azure where the VM may join the cluster before the CreateMachine call is completed,
416411
// and that would make an otherwise healthy node, be marked as stale.
@@ -497,6 +492,10 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
497492
case codes.Uninitialized:
498493
uninitializedMachine = true
499494
klog.Infof("VM instance associated with machine %s was created but not initialized.", machine.Name)
495+
//clean me up. I'm dirty.
496+
//TODO@thiyyakat add a pointer to a boolean variable indicating whether initialization has happened successfully.
497+
nodeName = getMachineStatusResponse.NodeName
498+
providerID = getMachineStatusResponse.ProviderID
500499

501500
default:
502501
updateRetryPeriod, updateErr := c.machineStatusUpdate(
@@ -527,43 +526,23 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
527526
nodeName = getMachineStatusResponse.NodeName
528527
providerID = getMachineStatusResponse.ProviderID
529528
}
530-
529+
//Update labels, providerID
530+
var clone *v1alpha1.Machine
531+
clone, err = c.updateLabels(ctx, createMachineRequest.Machine, nodeName, providerID)
532+
//initialize VM if not initialized
531533
if uninitializedMachine {
532-
retryPeriod, err := c.initializeMachine(ctx, createMachineRequest.Machine, createMachineRequest.MachineClass, createMachineRequest.Secret)
534+
var retryPeriod machineutils.RetryPeriod
535+
retryPeriod, err = c.initializeMachine(ctx, clone, createMachineRequest.MachineClass, createMachineRequest.Secret)
533536
if err != nil {
534537
return retryPeriod, err
535538
}
539+
// Return error even when machine object is updated
540+
err = fmt.Errorf("machine creation in process. Machine initialization (if required) is successful")
541+
return machineutils.ShortRetry, err
536542
}
537-
538-
_, machineNodeLabelPresent := createMachineRequest.Machine.Labels[v1alpha1.NodeLabelKey]
539-
_, machinePriorityAnnotationPresent := createMachineRequest.Machine.Annotations[machineutils.MachinePriority]
540-
541-
if !machineNodeLabelPresent || !machinePriorityAnnotationPresent || machine.Spec.ProviderID == "" {
542-
clone := machine.DeepCopy()
543-
if clone.Labels == nil {
544-
clone.Labels = make(map[string]string)
545-
}
546-
clone.Labels[v1alpha1.NodeLabelKey] = nodeName
547-
if clone.Annotations == nil {
548-
clone.Annotations = make(map[string]string)
549-
}
550-
if clone.Annotations[machineutils.MachinePriority] == "" {
551-
clone.Annotations[machineutils.MachinePriority] = "3"
552-
}
553-
554-
clone.Spec.ProviderID = providerID
555-
_, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{})
556-
if err != nil {
557-
klog.Warningf("Machine UPDATE failed for %q. Retrying, error: %s", machine.Name, err)
558-
} else {
559-
klog.V(2).Infof("Machine labels/annotations UPDATE for %q", machine.Name)
560-
561-
// Return error even when machine object is updated
562-
err = fmt.Errorf("Machine creation in process. Machine UPDATE successful")
563-
}
543+
if err != nil {
564544
return machineutils.ShortRetry, err
565545
}
566-
567546
if machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
568547
clone := machine.DeepCopy()
569548
clone.Status.LastOperation = v1alpha1.LastOperation{
@@ -577,23 +556,49 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
577556
TimeoutActive: true,
578557
LastUpdateTime: metav1.Now(),
579558
}
580-
581559
_, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{})
582560
if err != nil {
583561
klog.Warningf("Machine/status UPDATE failed for %q. Retrying, error: %s", machine.Name, err)
584562
} else {
585563
klog.V(2).Infof("Machine/status UPDATE for %q during creation", machine.Name)
586-
587564
// Return error even when machine object is updated
588-
err = fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful")
565+
err = fmt.Errorf("machine creation in process. Machine/Status UPDATE successful")
589566
}
590-
591567
return machineutils.ShortRetry, err
592568
}
593-
594569
return machineutils.LongRetry, nil
595570
}
596571

572+
func (c *controller) updateLabels(ctx context.Context, machine *v1alpha1.Machine, nodeName, providerID string) (clone *v1alpha1.Machine, err error) {
573+
machineNodeLabelPresent := metav1.HasLabel(machine.ObjectMeta, v1alpha1.NodeLabelKey)
574+
machinePriorityAnnotationPresent := metav1.HasAnnotation(machine.ObjectMeta, machineutils.MachinePriority)
575+
clone = machine.DeepCopy()
576+
if !machineNodeLabelPresent || !machinePriorityAnnotationPresent || machine.Spec.ProviderID == "" {
577+
if clone.Labels == nil {
578+
clone.Labels = make(map[string]string)
579+
}
580+
clone.Labels[v1alpha1.NodeLabelKey] = nodeName
581+
if clone.Annotations == nil {
582+
clone.Annotations = make(map[string]string)
583+
}
584+
if clone.Annotations[machineutils.MachinePriority] == "" {
585+
clone.Annotations[machineutils.MachinePriority] = "3"
586+
}
587+
clone.Spec.ProviderID = providerID
588+
var updatedMachine *v1alpha1.Machine
589+
updatedMachine, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{})
590+
if err != nil {
591+
klog.Warningf("Machine labels/annotations UPDATE failed for %q. Will retry after VM initialization (if required), error: %s", machine.Name, err)
592+
clone = machine.DeepCopy()
593+
} else {
594+
clone = updatedMachine
595+
klog.V(2).Infof("Machine labels/annotations UPDATE for %q", clone.Name)
596+
err = fmt.Errorf("machine creation in process. Machine labels/annotations update is successful")
597+
}
598+
}
599+
return clone, err
600+
}
601+
597602
func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Machine, machineClass *v1alpha1.MachineClass, secret *corev1.Secret) (machineutils.RetryPeriod, error) {
598603
req := &driver.InitializeMachineRequest{
599604
Machine: machine,
@@ -608,15 +613,11 @@ func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Ma
608613
klog.Errorf("Cannot decode Driver error for machine %q: %s. Unexpected behaviour as Driver errors are expected to be of type status.Status", machine.Name, err)
609614
return machineutils.LongRetry, err
610615
}
611-
klog.Errorf("Error occurred while initializing VM instance for machine %q: %s", machine.Name, err)
612-
switch errStatus.Code() {
613-
case codes.Unimplemented:
616+
if errStatus.Code() == codes.Unimplemented {
614617
klog.V(3).Infof("Provider does not support Driver.InitializeMachine - skipping VM instance initialization for %q.", machine.Name)
615618
return 0, nil
616-
case codes.NotFound:
617-
klog.Warningf("No VM instance found for machine %q. Skipping VM instance initialization.", machine.Name)
618-
return 0, nil
619619
}
620+
klog.Errorf("Error occurred while initializing VM instance for machine %q: %s", machine.Name, err)
620621
updateRetryPeriod, updateErr := c.machineStatusUpdate(
621622
ctx,
622623
machine,
@@ -633,14 +634,11 @@ func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Ma
633634
},
634635
machine.Status.LastKnownState,
635636
)
636-
637637
if updateErr != nil {
638638
return updateRetryPeriod, updateErr
639639
}
640-
641640
return machineutils.ShortRetry, err
642641
}
643-
644642
klog.V(3).Infof("VM instance %q for machine %q was initialized", resp.ProviderID, machine.Name)
645643
return 0, nil
646644
}
@@ -661,7 +659,7 @@ func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineReque
661659
return c.setMachineTerminationStatus(ctx, deleteMachineRequest)
662660

663661
case strings.Contains(machine.Status.LastOperation.Description, machineutils.GetVMStatus):
664-
return c.getVMStatus(
662+
return c.updateMachineStatusAndNodeLabel(
665663
ctx,
666664
&driver.GetMachineStatusRequest{
667665
Machine: deleteMachineRequest.Machine,

pkg/util/provider/machinecontroller/machine_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ var _ = Describe("machine", func() {
545545
ProviderID: "fakeID",
546546
},
547547
}, nil, nil, nil, map[string]string{v1alpha1.NodeLabelKey: "fakeNode-0"}, true, metav1.Now()),
548-
err: fmt.Errorf("Machine creation in process. Machine UPDATE successful"),
548+
err: fmt.Errorf("machine creation in process. Machine initialization (if required) is successful"),
549549
retry: machineutils.ShortRetry,
550550
},
551551
}),
@@ -623,7 +623,7 @@ var _ = Describe("machine", func() {
623623
true,
624624
metav1.Now(),
625625
),
626-
err: fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful"),
626+
err: fmt.Errorf("machine creation in process. Machine/Status UPDATE successful"),
627627
retry: machineutils.ShortRetry,
628628
},
629629
}),
@@ -1016,7 +1016,7 @@ var _ = Describe("machine", func() {
10161016
true,
10171017
metav1.Now(),
10181018
),
1019-
err: fmt.Errorf("Machine creation in process. Machine/Status UPDATE successful"),
1019+
err: fmt.Errorf("machine creation in process. Machine/Status UPDATE successful"),
10201020
retry: machineutils.ShortRetry,
10211021
},
10221022
}),
@@ -1068,6 +1068,7 @@ var _ = Describe("machine", func() {
10681068
Kind: "MachineClass",
10691069
Name: "machineClass",
10701070
},
1071+
ProviderID: "fakeID",
10711072
},
10721073
}, &v1alpha1.MachineStatus{
10731074
CurrentStatus: v1alpha1.CurrentStatus{
@@ -1079,7 +1080,7 @@ var _ = Describe("machine", func() {
10791080
State: v1alpha1.MachineStateFailed,
10801081
Type: v1alpha1.MachineOperationCreate,
10811082
},
1082-
}, nil, nil, nil, true, metav1.Now()),
1083+
}, nil, nil, map[string]string{v1alpha1.NodeLabelKey: "fakeNode-0"}, true, metav1.Now()),
10831084
err: status.Error(codes.Uninitialized, "VM instance could not be initialized"),
10841085
retry: machineutils.ShortRetry,
10851086
},
@@ -1503,7 +1504,7 @@ var _ = Describe("machine", func() {
15031504
},
15041505
},
15051506
expect: expect{
1506-
err: fmt.Errorf("Machine deletion in process. VM with matching ID found"),
1507+
err: fmt.Errorf("machine deletion in process. VM with matching ID found"),
15071508
retry: machineutils.ShortRetry,
15081509
machine: newMachine(
15091510
&v1alpha1.MachineTemplateSpec{

0 commit comments

Comments
 (0)