Skip to content

Commit a78807a

Browse files
authored
fix: Namespacesync copies resources after partial copy failure (#1229)
**What problem does this PR solve?**: Backport of #1228 By design, the namespacesync controller does not update resources in the target namespace. However, if it only copies some resources before being interrupted (i.e. the controller restarts), it will never copy the remaining resources. This PR adds a (failing) test that simulates this scenario, and then adds a fix. **Which issue(s) this PR fixes**: Fixes https://jira.nutanix.com/browse/NCN-108612 **How Has This Been Tested?**: <!-- Please describe the tests that you ran to verify your changes. Provide output from the tests and any manual steps needed to replicate the tests. --> **Special notes for your reviewer**: <!-- Use this to provide any additional information to the reviewers. This may include: - Best way to review the PR. - Where the author wants the most review attention on. - etc. --> **What problem does this PR solve?**: **Which issue(s) this PR fixes**: Fixes # **How Has This Been Tested?**: <!-- Please describe the tests that you ran to verify your changes. Provide output from the tests and any manual steps needed to replicate the tests. --> **Special notes for your reviewer**: <!-- Use this to provide any additional information to the reviewers. This may include: - Best way to review the PR. - Where the author wants the most review attention on. - etc. --> Signed-off-by: Daniel Lipovetsky <daniel.lipovetsky@nutanix.com>
1 parent bdbfe2c commit a78807a

File tree

4 files changed

+206
-70
lines changed

4 files changed

+206
-70
lines changed

pkg/controllers/namespacesync/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ func (r *Reconciler) Reconcile(
139139
scc,
140140
namespace,
141141
)
142-
if client.IgnoreAlreadyExists(err) != nil {
142+
if err != nil {
143143
// TODO Record an Event.
144144
return ctrl.Result{}, fmt.Errorf(
145145
"failed to copy source ClusterClass %s or its referenced Templates to namespace %s: %w",

pkg/controllers/namespacesync/controller_test.go

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010

1111
. "github.com/onsi/gomega"
1212
corev1 "k8s.io/api/core/v1"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1314
"k8s.io/apiserver/pkg/storage/names"
1415
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1516
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -21,7 +22,7 @@ func TestReconcileExistingNamespaceWithUpdatedLabels(t *testing.T) {
2122
g := NewWithT(t)
2223
timeout := 5 * time.Second
2324

24-
sourceClusterClassName, cleanup, err := createUniqueClusterClassAndTemplates(
25+
sourceClusterClassName, _, cleanup, err := createUniqueClusterClassAndTemplates(
2526
sourceClusterClassNamespace,
2627
)
2728
g.Expect(err).ToNot(HaveOccurred())
@@ -53,7 +54,7 @@ func TestReconcileNewNamespaces(t *testing.T) {
5354
g := NewWithT(t)
5455
timeout := 5 * time.Second
5556

56-
sourceClusterClassName, cleanup, err := createUniqueClusterClassAndTemplates(
57+
sourceClusterClassName, _, cleanup, err := createUniqueClusterClassAndTemplates(
5758
sourceClusterClassNamespace,
5859
)
5960
g.Expect(err).ToNot(HaveOccurred())
@@ -84,7 +85,7 @@ func TestReconcileNewClusterClass(t *testing.T) {
8485
targetNamespaces, err := createTargetNamespaces(3)
8586
g.Expect(err).ToNot(HaveOccurred())
8687

87-
sourceClusterClassName, cleanup, err := createUniqueClusterClassAndTemplates(
88+
sourceClusterClassName, _, cleanup, err := createUniqueClusterClassAndTemplates(
8889
sourceClusterClassNamespace,
8990
)
9091
g.Expect(err).ToNot(HaveOccurred())
@@ -108,7 +109,7 @@ func TestReconcileNewClusterClass(t *testing.T) {
108109
func TestSourceClusterClassNamespaceEmpty(t *testing.T) {
109110
g := NewWithT(t)
110111

111-
_, cleanup, err := createUniqueClusterClassAndTemplates(
112+
_, _, cleanup, err := createUniqueClusterClassAndTemplates(
112113
sourceClusterClassNamespace,
113114
)
114115
g.Expect(err).ToNot(HaveOccurred())
@@ -128,6 +129,60 @@ func TestSourceClusterClassNamespaceEmpty(t *testing.T) {
128129
g.Expect(ns).To(BeEmpty())
129130
}
130131

132+
func TestReconcileAfterPartialFailureToCopy(t *testing.T) {
133+
g := NewWithT(t)
134+
timeout := 5 * time.Second
135+
prefix := "partial-failure"
136+
137+
sourceClusterClassName, sourceTemplates, sourceCleanup, err := createClusterClassAndTemplates(
138+
prefix,
139+
sourceClusterClassNamespace,
140+
)
141+
g.Expect(err).ToNot(HaveOccurred())
142+
defer func() {
143+
g.Expect(sourceCleanup()).To(Succeed())
144+
}()
145+
146+
// Create a namespace with the same ClusterClass and templates.
147+
targetNamespace, err := env.CreateNamespace(ctx, "target", map[string]string{})
148+
g.Expect(err).ToNot(HaveOccurred())
149+
targetClusterClassName, targetTemplates, _, err := createClusterClassAndTemplates(
150+
prefix,
151+
targetNamespace.Name,
152+
)
153+
g.Expect(err).ToNot(HaveOccurred())
154+
155+
// Verify that the target ClusterClass and templates match the source.
156+
g.Expect(targetClusterClassName).To(Equal(sourceClusterClassName))
157+
for i := range targetTemplates {
158+
g.Expect(targetTemplates[i].GetName()).To(Equal(sourceTemplates[i].GetName()))
159+
}
160+
161+
// Simulate a partial copy failure by removing the ClusterClass--the last object copied--from the target namespace.
162+
g.Expect(env.CleanupAndWait(ctx, &clusterv1.ClusterClass{
163+
ObjectMeta: metav1.ObjectMeta{
164+
Name: targetClusterClassName,
165+
Namespace: targetNamespace.Name,
166+
},
167+
})).To(Succeed())
168+
169+
// Label the namespace so that it is considered a target namespace to trigger the reconcile loop.
170+
targetNamespace.Labels[targetNamespaceLabelKey] = ""
171+
err = env.Update(ctx, targetNamespace)
172+
g.Expect(err).ToNot(HaveOccurred())
173+
174+
// Verify that the templates are copied.
175+
g.Eventually(func() error {
176+
return verifyClusterClassAndTemplates(
177+
env.Client,
178+
sourceClusterClassName,
179+
targetNamespace.Name,
180+
)
181+
},
182+
timeout,
183+
).Should(Succeed())
184+
}
185+
131186
func verifyClusterClassAndTemplates(
132187
cli client.Reader,
133188
name,
@@ -151,6 +206,7 @@ func verifyClusterClassAndTemplates(
151206

152207
func createUniqueClusterClassAndTemplates(namespace string) (
153208
clusterClassName string,
209+
templates []client.Object,
154210
cleanup func() error,
155211
err error,
156212
) {
@@ -165,6 +221,7 @@ func createClusterClassAndTemplates(
165221
namespace string,
166222
) (
167223
clusterClassName string,
224+
templates []client.Object,
168225
cleanup func() error,
169226
err error,
170227
) {
@@ -206,7 +263,7 @@ func createClusterClassAndTemplates(
206263

207264
// Create the set of initObjects from the objects above to add to the API server when the test environment starts.
208265

209-
templates := []client.Object{
266+
templates = []client.Object{
210267
bootstrapTemplate,
211268
infraMachineTemplateWorker,
212269
infraMachineTemplateControlPlane,
@@ -216,11 +273,11 @@ func createClusterClassAndTemplates(
216273

217274
for _, obj := range templates {
218275
if err := env.CreateAndWait(ctx, obj); err != nil {
219-
return "", nil, err
276+
return "", nil, nil, err
220277
}
221278
}
222279
if err := env.CreateAndWait(ctx, clusterClass); err != nil {
223-
return "", nil, err
280+
return "", nil, nil, err
224281
}
225282

226283
cleanup = func() error {
@@ -232,7 +289,7 @@ func createClusterClassAndTemplates(
232289
return env.CleanupAndWait(ctx, clusterClass)
233290
}
234291

235-
return clusterClass.Name, cleanup, nil
292+
return clusterClass.Name, templates, cleanup, nil
236293
}
237294

238295
func createTargetNamespaces(number int) ([]*corev1.Namespace, error) {

pkg/controllers/namespacesync/copy.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ func copyClusterClassAndTemplates(
3232
// Copy Template to target namespace
3333
targetTemplate := copyObjectForCreate(sourceTemplate, sourceTemplate.GetName(), namespace)
3434

35-
if err := w.Create(ctx, targetTemplate); err != nil {
35+
//nolint:gocritic // Inline error is checked.
36+
if err := w.Create(ctx, targetTemplate); client.IgnoreAlreadyExists(err) != nil {
3637
return fmt.Errorf(
3738
"failed to create %s %s: %w",
3839
targetTemplate.GetKind(),
@@ -50,7 +51,8 @@ func copyClusterClassAndTemplates(
5051
return fmt.Errorf("error processing references: %w", err)
5152
}
5253

53-
if err := w.Create(ctx, target); err != nil {
54+
//nolint:gocritic // Inline error is checked.
55+
if err := w.Create(ctx, target); client.IgnoreAlreadyExists(err) != nil {
5456
return fmt.Errorf(
5557
"failed to create %s %s: %w",
5658
target.Kind,

pkg/controllers/namespacesync/copy_test.go

Lines changed: 136 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,76 +4,153 @@
44
package namespacesync
55

66
import (
7+
"context"
8+
"errors"
9+
"fmt"
710
"testing"
8-
"time"
911

1012
. "github.com/onsi/gomega"
11-
apierrors "k8s.io/apimachinery/pkg/api/errors"
13+
"k8s.io/apimachinery/pkg/runtime"
1214
"k8s.io/apiserver/pkg/storage/names"
1315
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1416
"sigs.k8s.io/controller-runtime/pkg/client"
17+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
18+
19+
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/internal/test/builder"
20+
)
21+
22+
const (
23+
sourceNamespace = "source-ns"
24+
targetNamespace = "target-ns"
1525
)
1626

17-
func TestDoNotUpdateIfTargetExists(t *testing.T) {
27+
var errFakeCreate = errors.New("fake create error")
28+
29+
type mockWriter struct {
30+
client.Writer
31+
createErrOnKind string
32+
createdObjects []client.Object
33+
}
34+
35+
func (m *mockWriter) Create(
36+
ctx context.Context,
37+
obj client.Object,
38+
opts ...client.CreateOption,
39+
) error {
40+
if m.createErrOnKind != "" && obj.GetObjectKind().GroupVersionKind().Kind == m.createErrOnKind {
41+
return errFakeCreate
42+
}
43+
m.createdObjects = append(m.createdObjects, obj)
44+
// Fake setting of UID to simulate a real API server create.
45+
obj.SetUID("fake-uid")
46+
return nil
47+
}
48+
49+
func TestCopyClusterClassAndTemplates(t *testing.T) {
1850
g := NewWithT(t)
19-
timeout := 50 * time.Second
20-
21-
prefix := names.SimpleNameGenerator.GenerateName("test-")
22-
sourceClusterClassName, cleanup, err := createClusterClassAndTemplates(
23-
prefix,
24-
sourceClusterClassNamespace,
25-
)
26-
g.Expect(err).ToNot(HaveOccurred())
27-
28-
targetNamespaces, err := createTargetNamespaces(3)
29-
g.Expect(err).ToNot(HaveOccurred())
30-
31-
for _, targetNamespace := range targetNamespaces {
32-
g.Eventually(func() error {
33-
return verifyClusterClassAndTemplates(
34-
env.Client,
35-
sourceClusterClassName,
36-
targetNamespace.Name,
37-
)
51+
ctx := context.Background()
52+
53+
testCases := []struct {
54+
name string
55+
createErrOnKind string
56+
expectErr error
57+
expectNumCopies int
58+
}{
59+
{
60+
name: "should succeed if all objects are created",
61+
expectNumCopies: 6, // 1 ClusterClass + 5 templates
62+
},
63+
{
64+
name: "should fail if creating a template fails",
65+
createErrOnKind: "GenericInfrastructureClusterTemplate",
66+
expectErr: errFakeCreate,
67+
expectNumCopies: 0, // The first template create will fail.
68+
},
69+
{
70+
name: "should fail if creating the clusterclass fails",
71+
createErrOnKind: "ClusterClass",
72+
expectErr: errFakeCreate,
73+
expectNumCopies: 5, // All 5 templates are created before ClusterClass.
3874
},
39-
timeout,
40-
).Should(Succeed())
4175
}
4276

43-
// Delete source class
44-
g.Expect(cleanup()).To(Succeed())
45-
46-
// Create source class again
47-
sourceClusterClassName, cleanup, err = createClusterClassAndTemplates(
48-
prefix,
49-
sourceClusterClassNamespace,
50-
)
51-
g.Expect(err).ToNot(HaveOccurred())
52-
defer func() {
53-
g.Expect(cleanup()).To(Succeed())
54-
}()
55-
56-
source := &clusterv1.ClusterClass{}
57-
err = env.Client.Get(
58-
ctx,
59-
client.ObjectKey{
60-
Namespace: sourceClusterClassNamespace,
61-
Name: sourceClusterClassName,
62-
},
63-
source,
64-
)
65-
g.Expect(err).ToNot(HaveOccurred())
66-
67-
// Verify that the copy function returns an error because the target ClusterClass and Templates
68-
// already exist.
69-
for _, targetNamespace := range targetNamespaces {
70-
err = copyClusterClassAndTemplates(
71-
ctx,
72-
env.Client,
73-
env.Client,
74-
source,
75-
targetNamespace.Name,
76-
)
77-
g.Expect(apierrors.IsAlreadyExists(err)).To(BeTrue())
77+
for _, tc := range testCases {
78+
t.Run(tc.name, func(t *testing.T) {
79+
sourceClusterClass, sourceTemplates := newTestClusterClassAndTemplates(
80+
sourceNamespace,
81+
names.SimpleNameGenerator.GenerateName("test-cc-"),
82+
)
83+
initObjs := []runtime.Object{sourceClusterClass}
84+
for _, template := range sourceTemplates {
85+
initObjs = append(initObjs, template)
86+
}
87+
88+
fakeReader := fake.NewClientBuilder().WithRuntimeObjects(initObjs...).Build()
89+
mockWriter := &mockWriter{
90+
createErrOnKind: tc.createErrOnKind,
91+
}
92+
93+
err := copyClusterClassAndTemplates(
94+
ctx,
95+
mockWriter,
96+
fakeReader,
97+
sourceClusterClass,
98+
targetNamespace,
99+
)
100+
101+
if tc.expectErr != nil {
102+
g.Expect(err).To(HaveOccurred())
103+
g.Expect(err).To(MatchError(ContainSubstring(tc.expectErr.Error())))
104+
} else {
105+
g.Expect(err).ToNot(HaveOccurred())
106+
}
107+
108+
g.Expect(len(mockWriter.createdObjects)).To(Equal(tc.expectNumCopies))
109+
110+
for _, obj := range mockWriter.createdObjects {
111+
g.Expect(obj.GetNamespace()).To(Equal(targetNamespace))
112+
g.Expect(obj.GetOwnerReferences()).To(BeEmpty())
113+
g.Expect(obj.GetUID()).ToNot(BeEmpty())
114+
g.Expect(obj.GetResourceVersion()).To(BeEmpty())
115+
}
116+
})
117+
}
118+
}
119+
120+
// newTestClusterClassAndTemplates is a helper to generate a valid ClusterClass with all its referenced templates.
121+
func newTestClusterClassAndTemplates(
122+
namespace,
123+
prefix string,
124+
) (*clusterv1.ClusterClass, []client.Object) {
125+
bootstrapTemplate := builder.BootstrapTemplate(namespace, prefix).Build()
126+
infraMachineTemplateControlPlane := builder.InfrastructureMachineTemplate(
127+
namespace,
128+
fmt.Sprintf("%s-control-plane", prefix),
129+
).Build()
130+
infraMachineTemplateWorker := builder.InfrastructureMachineTemplate(
131+
namespace,
132+
fmt.Sprintf("%s-worker", prefix),
133+
).Build()
134+
controlPlaneTemplate := builder.ControlPlaneTemplate(namespace, prefix).Build()
135+
infraClusterTemplate := builder.InfrastructureClusterTemplate(namespace, prefix).Build()
136+
machineDeploymentClass := builder.MachineDeploymentClass(fmt.Sprintf("%s-worker", prefix)).
137+
WithBootstrapTemplate(bootstrapTemplate).
138+
WithInfrastructureTemplate(infraMachineTemplateWorker).
139+
Build()
140+
clusterClass := builder.ClusterClass(namespace, prefix).
141+
WithInfrastructureClusterTemplate(infraClusterTemplate).
142+
WithControlPlaneTemplate(controlPlaneTemplate).
143+
WithControlPlaneInfrastructureMachineTemplate(infraMachineTemplateControlPlane).
144+
WithWorkerMachineDeploymentClasses(*machineDeploymentClass).
145+
Build()
146+
147+
templates := []client.Object{
148+
bootstrapTemplate,
149+
infraMachineTemplateWorker,
150+
infraMachineTemplateControlPlane,
151+
controlPlaneTemplate,
152+
infraClusterTemplate,
78153
}
154+
155+
return clusterClass, templates
79156
}

0 commit comments

Comments
 (0)