Skip to content

Commit 281f5b2

Browse files
mdboothEmilienM
authored andcommitted
Fix panic when OpenStack server is deleted by an external agent
The panic happened because we did not correctly handle that GetInstanceStatus returns a nil server if the server does not exist, rather than a 404 error. We had the same oversight in the OpenStackServer controller. It looks like this would have resulted in recreating the server.
1 parent 5ee7040 commit 281f5b2

File tree

4 files changed

+218
-21
lines changed

4 files changed

+218
-21
lines changed

api/v1beta1/conditions_consts.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ const (
4444
OpenStackErrorReason = "OpenStackError"
4545
// DependencyFailedReason indicates that a dependent object failed.
4646
DependencyFailedReason = "DependencyFailed"
47+
48+
// ServerUnexpectedDeletedMessage is the message used when the server is unexpectedly deleted via an external agent.
49+
ServerUnexpectedDeletedMessage = "The server was unexpectedly deleted"
4750
)
4851

4952
const (

controllers/openstackmachine_controller.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,11 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
365365
return ctrl.Result{}, err
366366
}
367367

368+
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
369+
if result != nil {
370+
return *result, nil
371+
}
372+
368373
computeService, err := compute.NewService(scope)
369374
if err != nil {
370375
return ctrl.Result{}, err
@@ -377,6 +382,12 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
377382
return ctrl.Result{}, err
378383
}
379384

385+
if instanceStatus == nil {
386+
msg := "server has been unexpectedly deleted"
387+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, msg)
388+
openStackMachine.SetFailure(capoerrors.DeprecatedCAPIUpdateMachineError, errors.New(msg))
389+
}
390+
380391
instanceNS, err := instanceStatus.NetworkStatus()
381392
if err != nil {
382393
return ctrl.Result{}, fmt.Errorf("get network status: %w", err)
@@ -393,22 +404,15 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
393404
})
394405
openStackMachine.Status.Addresses = addresses
395406

396-
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
397-
if result != nil {
398-
return *result, nil
399-
}
400-
401-
if !util.IsControlPlaneMachine(machine) {
402-
scope.Logger().Info("Not a Control plane machine, no floating ip reconcile needed, Reconciled Machine create successfully")
403-
return ctrl.Result{}, nil
404-
}
407+
if util.IsControlPlaneMachine(machine) {
408+
err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName)
409+
if err != nil {
410+
return ctrl.Result{}, err
411+
}
405412

406-
err = r.reconcileAPIServerLoadBalancer(scope, openStackCluster, openStackMachine, instanceStatus, instanceNS, clusterResourceName)
407-
if err != nil {
408-
return ctrl.Result{}, err
413+
conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition)
409414
}
410415

411-
conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition)
412416
scope.Logger().Info("Reconciled Machine create successfully")
413417
return ctrl.Result{}, nil
414418
}

controllers/openstackserver_controller.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,18 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
433433

434434
if openStackServer.Status.InstanceID != nil {
435435
instanceStatus, err = computeService.GetInstanceStatus(*openStackServer.Status.InstanceID)
436-
if err != nil {
437-
logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name)
438-
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, "%s", err.Error())
436+
if err != nil || instanceStatus == nil {
437+
logger.Info("Unable to get OpenStack instance", "name", openStackServer.Name, "id", *openStackServer.Status.InstanceID)
438+
var msg string
439+
var reason string
440+
if err != nil {
441+
msg = err.Error()
442+
reason = infrav1.OpenStackErrorReason
443+
} else {
444+
msg = infrav1.ServerUnexpectedDeletedMessage
445+
reason = infrav1.InstanceNotFoundReason
446+
}
447+
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, reason, clusterv1.ConditionSeverityError, msg)
439448
return nil, err
440449
}
441450
}
@@ -450,11 +459,6 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
450459
logger.Info("Server already exists", "name", openStackServer.Name, "id", instanceStatus.ID())
451460
return instanceStatus, nil
452461
}
453-
if openStackServer.Status.InstanceID != nil {
454-
logger.Info("Not reconciling server in failed state. The previously existing OpenStack instance is no longer available")
455-
conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "virtual machine no longer exists")
456-
return nil, nil
457-
}
458462

459463
logger.Info("Server does not exist, creating Server", "name", openStackServer.Name)
460464
instanceSpec, err := r.serverToInstanceSpec(ctx, openStackServer)
@@ -468,6 +472,8 @@ func (r *OpenStackServerReconciler) getOrCreateServer(ctx context.Context, logge
468472
openStackServer.Status.InstanceState = &infrav1.InstanceStateError
469473
return nil, fmt.Errorf("create OpenStack instance: %w", err)
470474
}
475+
// We reached a point where a server was created with no error but we can't predict its state yet which is why we don't update conditions yet.
476+
// The actual state of the server is checked in the next reconcile loops.
471477
return instanceStatus, nil
472478
}
473479
return instanceStatus, nil

controllers/openstackserver_controller_test.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,11 @@ import (
3131
"github.com/gophercloud/gophercloud/v2/openstack/networking/v2/ports"
3232
. "github.com/onsi/gomega" //nolint:revive
3333
"go.uber.org/mock/gomock"
34+
corev1 "k8s.io/api/core/v1"
3435
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536
"k8s.io/utils/ptr"
37+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
38+
"sigs.k8s.io/cluster-api/util/conditions"
3639

3740
infrav1alpha1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha1"
3841
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1"
@@ -548,3 +551,184 @@ func Test_OpenStackServerReconcileCreate(t *testing.T) {
548551
})
549552
}
550553
}
554+
555+
func TestOpenStackServerReconciler_getOrCreateServer(t *testing.T) {
556+
tests := []struct {
557+
name string
558+
openStackServer *infrav1alpha1.OpenStackServer
559+
setupMocks func(r *recorders)
560+
wantServer *servers.Server
561+
wantErr bool
562+
wantCondition *clusterv1.Condition
563+
}{
564+
{
565+
name: "instanceID set in status but server not found",
566+
openStackServer: &infrav1alpha1.OpenStackServer{
567+
Status: infrav1alpha1.OpenStackServerStatus{
568+
InstanceID: ptr.To(instanceUUID),
569+
},
570+
},
571+
setupMocks: func(r *recorders) {
572+
r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrUnexpectedResponseCode{Actual: 404})
573+
},
574+
wantErr: false,
575+
wantCondition: &clusterv1.Condition{
576+
Type: infrav1.InstanceReadyCondition,
577+
Status: corev1.ConditionFalse,
578+
Reason: infrav1.InstanceNotFoundReason,
579+
Message: infrav1.ServerUnexpectedDeletedMessage,
580+
},
581+
},
582+
{
583+
name: "instanceID set in status but server not found with error",
584+
openStackServer: &infrav1alpha1.OpenStackServer{
585+
Status: infrav1alpha1.OpenStackServerStatus{
586+
InstanceID: ptr.To(instanceUUID),
587+
},
588+
},
589+
setupMocks: func(r *recorders) {
590+
r.compute.GetServer(instanceUUID).Return(nil, fmt.Errorf("error"))
591+
},
592+
wantErr: true,
593+
wantCondition: &clusterv1.Condition{
594+
Type: infrav1.InstanceReadyCondition,
595+
Status: corev1.ConditionFalse,
596+
Reason: infrav1.OpenStackErrorReason,
597+
Message: "get server \"" + instanceUUID + "\" detail failed: error",
598+
},
599+
},
600+
{
601+
name: "instanceStatus is nil but server found with machine name",
602+
openStackServer: &infrav1alpha1.OpenStackServer{
603+
ObjectMeta: metav1.ObjectMeta{
604+
Name: openStackServerName,
605+
},
606+
Status: infrav1alpha1.OpenStackServerStatus{},
607+
},
608+
setupMocks: func(r *recorders) {
609+
r.compute.ListServers(servers.ListOpts{
610+
Name: "^" + openStackServerName + "$",
611+
}).Return([]servers.Server{{ID: instanceUUID}}, nil)
612+
},
613+
wantErr: false,
614+
wantServer: &servers.Server{
615+
ID: instanceUUID,
616+
},
617+
},
618+
{
619+
name: "instanceStatus is nil and server not found and then created",
620+
openStackServer: &infrav1alpha1.OpenStackServer{
621+
ObjectMeta: metav1.ObjectMeta{
622+
Name: openStackServerName,
623+
},
624+
Status: infrav1alpha1.OpenStackServerStatus{
625+
Resolved: &infrav1alpha1.ResolvedServerSpec{
626+
ImageID: imageUUID,
627+
FlavorID: flavorUUID,
628+
Ports: defaultResolvedPorts,
629+
},
630+
},
631+
},
632+
setupMocks: func(r *recorders) {
633+
r.compute.ListServers(servers.ListOpts{
634+
Name: "^" + openStackServerName + "$",
635+
}).Return([]servers.Server{}, nil)
636+
r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(&servers.Server{ID: instanceUUID}, nil)
637+
},
638+
wantErr: false,
639+
wantServer: &servers.Server{
640+
ID: instanceUUID,
641+
},
642+
// It's off but no condition is set because the server creation was kicked off but we
643+
// don't know the result yet in this function.
644+
},
645+
{
646+
name: "instanceStatus is nil and server not found and then created with error",
647+
openStackServer: &infrav1alpha1.OpenStackServer{
648+
ObjectMeta: metav1.ObjectMeta{
649+
Name: openStackServerName,
650+
},
651+
Status: infrav1alpha1.OpenStackServerStatus{
652+
Resolved: &infrav1alpha1.ResolvedServerSpec{
653+
ImageID: imageUUID,
654+
FlavorID: flavorUUID,
655+
Ports: defaultResolvedPorts,
656+
},
657+
},
658+
},
659+
setupMocks: func(r *recorders) {
660+
r.compute.ListServers(servers.ListOpts{
661+
Name: "^" + openStackServerName + "$",
662+
}).Return([]servers.Server{}, nil)
663+
r.compute.CreateServer(gomock.Any(), gomock.Any()).Return(nil, fmt.Errorf("error"))
664+
},
665+
wantErr: true,
666+
wantCondition: &clusterv1.Condition{
667+
Type: infrav1.InstanceReadyCondition,
668+
Status: corev1.ConditionFalse,
669+
Reason: infrav1.InstanceCreateFailedReason,
670+
Message: "error creating Openstack instance: " + "error",
671+
},
672+
},
673+
}
674+
675+
for _, tt := range tests {
676+
t.Run(tt.name, func(t *testing.T) {
677+
g := NewGomegaWithT(t)
678+
log := testr.New(t)
679+
680+
mockCtrl := gomock.NewController(t)
681+
defer mockCtrl.Finish()
682+
683+
mockScopeFactory := scope.NewMockScopeFactory(mockCtrl, "")
684+
scopeWithLogger := scope.NewWithLogger(mockScopeFactory, log)
685+
686+
computeRecorder := mockScopeFactory.ComputeClient.EXPECT()
687+
imageRecorder := mockScopeFactory.ImageClient.EXPECT()
688+
networkRecorder := mockScopeFactory.NetworkClient.EXPECT()
689+
volumeRecorder := mockScopeFactory.VolumeClient.EXPECT()
690+
691+
recorders := &recorders{
692+
compute: computeRecorder,
693+
image: imageRecorder,
694+
network: networkRecorder,
695+
volume: volumeRecorder,
696+
}
697+
698+
if tt.setupMocks != nil {
699+
tt.setupMocks(recorders)
700+
}
701+
702+
computeService, err := compute.NewService(scopeWithLogger)
703+
g.Expect(err).ToNot(HaveOccurred())
704+
705+
reconciler := OpenStackServerReconciler{}
706+
status, err := reconciler.getOrCreateServer(ctx, log, tt.openStackServer, computeService, []string{portUUID})
707+
708+
// Check error result
709+
if tt.wantErr {
710+
g.Expect(err).To(HaveOccurred())
711+
} else {
712+
g.Expect(err).ToNot(HaveOccurred())
713+
}
714+
715+
// Check instance status
716+
if tt.wantServer != nil {
717+
g.Expect(status.ID()).To(Equal(tt.wantServer.ID))
718+
}
719+
720+
// Check the condition is set correctly
721+
if tt.wantCondition != nil {
722+
// print openstackServer conditions
723+
for _, condition := range tt.openStackServer.Status.Conditions {
724+
t.Logf("Condition: %s, Status: %s, Reason: %s", condition.Type, condition.Status, condition.Reason)
725+
}
726+
conditionType := conditions.Get(tt.openStackServer, tt.wantCondition.Type)
727+
g.Expect(conditionType).ToNot(BeNil())
728+
g.Expect(conditionType.Status).To(Equal(tt.wantCondition.Status))
729+
g.Expect(conditionType.Reason).To(Equal(tt.wantCondition.Reason))
730+
g.Expect(conditionType.Message).To(Equal(tt.wantCondition.Message))
731+
}
732+
})
733+
}
734+
}

0 commit comments

Comments
 (0)