Skip to content

Commit 5185bca

Browse files
committed
fix: prevent adding failed images to workqueue repeatedly
Signed-off-by: zeroalphat <taichi-takemura@cybozu.co.jp>
1 parent 7cc3e84 commit 5185bca

File tree

4 files changed

+44
-20
lines changed

4 files changed

+44
-20
lines changed

api/v1/nodeimageset_types.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,11 @@ type ContainerImageStatus struct {
8484
}
8585

8686
const (
87-
WaitingForImageDownload = "WaitingForImageDownload"
88-
ImageDownloaded = "ImageDownloaded"
89-
ImageDownloadInProgress = "ImageDownloadInProgress"
90-
ImageDownloadFailed = "ImageDownloadFailed"
87+
WaitingForImageDownload = "WaitingForImageDownload"
88+
ImageDownloaded = "ImageDownloaded"
89+
ImageDownloadInProgress = "ImageDownloadInProgress"
90+
ImageDownloadFailed = "ImageDownloadFailed"
91+
ImageDownloadTemporarilyFailed = "ImageDownloadTemporarilyFailed"
9192
)
9293

9394
const (

internal/controller/nodeimageset_controller.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,12 @@ func (r *NodeImageSetReconciler) collectPendingImages(ctx context.Context, nodeI
142142

143143
var images []string
144144
for _, image := range nodeImageSet.Spec.Images {
145-
status, _, err := r.ImagePuller.GetImageStatus(ctx, nodeImageSet.Name, image)
145+
status, _, err := r.ImagePuller.GetImageStatus(ctx, nodeImageSet.Name, image, nodeImageSet.Spec.RegistryPolicy)
146146
if err != nil {
147147
log.FromContext(ctx).Error(err, "failed to get image status", "image", image)
148148
continue
149149
}
150-
if status == ofenv1.WaitingForImageDownload || status == ofenv1.ImageDownloadFailed {
150+
if status == ofenv1.WaitingForImageDownload {
151151
images = append(images, image)
152152
}
153153
}
@@ -173,7 +173,7 @@ func (r *NodeImageSetReconciler) updateStatus(ctx context.Context, nodeImageSet
173173

174174
for _, image := range nodeImageSet.Spec.Images {
175175
var errMsg string
176-
status, errMsg, err := r.ImagePuller.GetImageStatus(ctx, nodeImageSet.Name, image)
176+
status, errMsg, err := r.ImagePuller.GetImageStatus(ctx, nodeImageSet.Name, image, nodeImageSet.Spec.RegistryPolicy)
177177
if err != nil {
178178
return ctrl.Result{}, fmt.Errorf("failed to get image status for %s: %w", image, err)
179179
}
@@ -242,7 +242,7 @@ func (r *NodeImageSetReconciler) calculateImageStatus(ctx context.Context, nis *
242242
failed := 0
243243

244244
for _, image := range nis.Spec.Images {
245-
status, _, err := r.ImagePuller.GetImageStatus(ctx, nis.Name, image)
245+
status, _, err := r.ImagePuller.GetImageStatus(ctx, nis.Name, image, nis.Spec.RegistryPolicy)
246246
if err != nil {
247247
log.FromContext(ctx).Error(err, "failed to get image status", "image", image)
248248
continue
@@ -252,6 +252,9 @@ func (r *NodeImageSetReconciler) calculateImageStatus(ctx context.Context, nis *
252252
downloadedImage++
253253
case ofenv1.ImageDownloadFailed:
254254
failed++
255+
case ofenv1.ImageDownloadTemporarilyFailed:
256+
// Do not reflect ImageDownloadTemporarilyFailed in the status.
257+
// It's a temporary error that resolves once the image is cached in the registry mirror.
255258
case ofenv1.ImageDownloadInProgress:
256259
default:
257260
}

internal/imgmanager/image_puller.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,18 +201,13 @@ func (p *ImagePuller) PullImage(ctx context.Context, nodeImageSetName, ref strin
201201

202202
imageStatus.StopPulling()
203203
if err != nil {
204-
if errdefs.IsNotFound(err) && registryPolicy == ofenv1.RegistryPolicyMirrorOnly {
205-
// If the image is not found and the policy is MirrorOnly, we treat it as a non-fatal error.
206-
p.logger.Info("image not found in mirror registry, skipping pull", "image", ref)
207-
} else {
208-
imageStatus.SetError(err)
209-
}
204+
imageStatus.SetError(err)
210205
}
211206

212207
return err
213208
}
214209

215-
func (p *ImagePuller) GetImageStatus(ctx context.Context, nodeImageSetName, imageName string) (string, string, error) {
210+
func (p *ImagePuller) GetImageStatus(ctx context.Context, nodeImageSetName, imageName string, registryPolicy ofenv1.RegistryPolicy) (string, string, error) {
216211
value, ok := p.status.Load(nodeImageSetName)
217212
if !ok {
218213
return "", "", nil
@@ -238,6 +233,12 @@ func (p *ImagePuller) GetImageStatus(ctx context.Context, nodeImageSetName, imag
238233
}
239234
imageErr := imageStatus.GetError()
240235
if imageErr != nil {
236+
// When using mirror-only registry policy, treat image not found errors as temporary failures.
237+
// This handles cases where the image hasn't been cached in the registry mirror yet due to
238+
// timing issues during the image pull process.
239+
if errdefs.IsNotFound(imageErr) && registryPolicy == ofenv1.RegistryPolicyMirrorOnly {
240+
return ofenv1.ImageDownloadTemporarilyFailed, imageErr.Error(), nil
241+
}
241242
return ofenv1.ImageDownloadFailed, imageErr.Error(), nil
242243
}
243244

internal/imgmanager/image_puller_test.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
eventtypes "github.com/containerd/containerd/api/events"
1111
"github.com/containerd/containerd/v2/core/events"
12+
"github.com/containerd/errdefs"
1213
"github.com/containerd/typeurl/v2"
1314
"github.com/go-logr/logr"
1415
"github.com/stretchr/testify/assert"
@@ -261,14 +262,14 @@ func TestImagePullerGetImageStatusStates(t *testing.T) {
261262
puller.NewNodeImageSetStatus(testNodeImageSetName)
262263

263264
// Test WaitingForImageDownload state (initial state)
264-
status, message, err := puller.GetImageStatus(ctx, testNodeImageSetName, testImageName)
265+
status, message, err := puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyDefault)
265266
assert.NoError(t, err)
266267
assert.Equal(t, ofenv1.WaitingForImageDownload, status)
267268
assert.Empty(t, message)
268269

269270
// Test ImageDownloaded state
270271
fakeClient.SetImageExists(testImageName, true)
271-
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName)
272+
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyDefault)
272273
assert.NoError(t, err)
273274
assert.Equal(t, ofenv1.ImageDownloaded, status)
274275
assert.Empty(t, message)
@@ -281,7 +282,7 @@ func TestImagePullerGetImageStatusStates(t *testing.T) {
281282
imageStatus := nodeStatus.GetOrCreateImageStatus(testImageName)
282283
imageStatus.SetImagePulling(true)
283284

284-
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName)
285+
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyDefault)
285286
assert.NoError(t, err)
286287
assert.Equal(t, ofenv1.ImageDownloadInProgress, status)
287288
assert.Empty(t, message)
@@ -291,10 +292,28 @@ func TestImagePullerGetImageStatusStates(t *testing.T) {
291292
testErr := fmt.Errorf("pull failed")
292293
imageStatus.SetError(testErr)
293294

294-
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName)
295+
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyDefault)
295296
assert.NoError(t, err)
296297
assert.Equal(t, ofenv1.ImageDownloadFailed, status)
297298
assert.Equal(t, "pull failed", message)
299+
300+
// Test ImageDownloadFailed state with a temporary error
301+
imageStatus.ClearError()
302+
imageStatus.SetImagePulling(false)
303+
imageStatus.SetError(errdefs.ErrNotFound)
304+
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyDefault)
305+
assert.NoError(t, err)
306+
assert.Equal(t, ofenv1.ImageDownloadFailed, status)
307+
assert.Equal(t, "not found", message)
308+
309+
// Test ImageDownloadTemporarilyFailed state
310+
imageStatus.ClearError()
311+
imageStatus.SetImagePulling(false)
312+
imageStatus.SetError(errdefs.ErrNotFound)
313+
status, message, err = puller.GetImageStatus(ctx, testNodeImageSetName, testImageName, ofenv1.RegistryPolicyMirrorOnly)
314+
assert.NoError(t, err)
315+
assert.Equal(t, ofenv1.ImageDownloadTemporarilyFailed, status)
316+
assert.Equal(t, "not found", message)
298317
}
299318

300319
func TestImagePullerGetImageStatusNonexistentNodeImageSet(t *testing.T) {
@@ -306,7 +325,7 @@ func TestImagePullerGetImageStatusNonexistentNodeImageSet(t *testing.T) {
306325

307326
ctx := context.Background()
308327

309-
status, message, err := puller.GetImageStatus(ctx, "nonexistent", testImageName)
328+
status, message, err := puller.GetImageStatus(ctx, "nonexistent", testImageName, ofenv1.RegistryPolicyDefault)
310329
assert.NoError(t, err)
311330
assert.Empty(t, status)
312331
assert.Empty(t, message)

0 commit comments

Comments
 (0)