Skip to content

Commit 063fe18

Browse files
committed
feat: improve S3 image sync with optimized data handling
1 parent cbb05aa commit 063fe18

File tree

2 files changed

+65
-40
lines changed

2 files changed

+65
-40
lines changed

internal/sync/images.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ func push(ctx context.Context, image *structs.Image, desc *remote.Descriptor, ds
3232
return backoff.RetryNotify(func() error {
3333
if strings.HasPrefix(dst, "r2:") {
3434
return pushR2(ctx, image, desc, dst, tag)
35-
} else if strings.HasPrefix(dst, "s3:") {
35+
}
36+
37+
if strings.HasPrefix(dst, "s3:") {
3638
return pushS3(ctx, image, desc, dst, tag)
3739
}
3840

internal/sync/s3.go

Lines changed: 62 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ package sync
33
import (
44
"bytes"
55
"context"
6+
"crypto/sha256"
67
"fmt"
78
"io"
89
"net/http"
10+
"os"
911
"path/filepath"
1012
"strings"
1113
"time"
@@ -69,12 +71,12 @@ func pushS3WithSession(ctx context.Context, s3Session *s3.S3, bucket *string, im
6971
"v2",
7072
acl,
7173
aws.String("application/json"),
72-
[]byte{}, // No content is needed, we just need to return a 200.
74+
bytes.NewReader([]byte{}), // No content is needed, we just need to return a 200.
7375
); err != nil {
7476
return err
7577
}
7678

77-
baseDir := filepath.Join("v2", image.GetName())
79+
baseDir := filepath.Join("v2", image.GetSourceRepository())
7880

7981
i, err := desc.Image()
8082
if err != nil {
@@ -93,7 +95,7 @@ func pushS3WithSession(ctx context.Context, s3Session *s3.S3, bucket *string, im
9395
filepath.Join(baseDir, "blobs", cnfHash.String()),
9496
acl,
9597
aws.String("application/vnd.docker.container.image.v1+json"),
96-
cnf,
98+
bytes.NewReader(cnf),
9799
); err != nil {
98100
return err
99101
}
@@ -105,46 +107,56 @@ func pushS3WithSession(ctx context.Context, s3Session *s3.S3, bucket *string, im
105107
return err
106108
}
107109

110+
// Blobs can be huge and we need a io.ReadSeeker, so we can't read them all into memory.
111+
tmpDir, err := os.MkdirTemp(os.TempDir(), "docker-sync")
112+
if err != nil {
113+
return err
114+
}
115+
108116
// Layers are synced first to avoid making a tag available before all its blobs are available.
109117
for _, layer := range l {
110-
digest, err := layer.Digest()
111-
if err != nil {
112-
return err
113-
}
114-
115-
mediaType, err := layer.MediaType()
116-
if err != nil {
117-
return err
118-
}
118+
if err := func() error {
119+
digest, err := layer.Digest()
120+
if err != nil {
121+
return err
122+
}
119123

120-
var r io.ReadCloser
124+
mediaType, err := layer.MediaType()
125+
if err != nil {
126+
return err
127+
}
121128

122-
if strings.HasSuffix(string(mediaType), ".gzip") {
123-
r, err = layer.Compressed()
129+
r, err := layer.Compressed()
124130
if err != nil {
125131
return err
126132
}
127-
} else {
128-
r, err = layer.Uncompressed()
133+
134+
tmpFile, err := os.Create(filepath.Join(tmpDir, "blob"))
129135
if err != nil {
130136
return err
131137
}
132-
}
138+
defer os.Remove(tmpFile.Name())
139+
defer tmpFile.Close()
133140

134-
b, err := io.ReadAll(r)
135-
if err != nil {
136-
return err
137-
}
141+
if _, err := io.Copy(tmpFile, r); err != nil {
142+
return err
143+
}
144+
tmpFile.Seek(0, io.SeekStart)
138145

139-
if err := syncObject(
140-
ctx,
141-
s3Session,
142-
bucket,
143-
filepath.Join(baseDir, "blobs", digest.String()),
144-
acl,
145-
aws.String(string(mediaType)),
146-
b,
147-
); err != nil {
146+
if err := syncObject(
147+
ctx,
148+
s3Session,
149+
bucket,
150+
filepath.Join(baseDir, "blobs", digest.String()),
151+
acl,
152+
aws.String(string(mediaType)),
153+
tmpFile,
154+
); err != nil {
155+
return err
156+
}
157+
158+
return nil
159+
}(); err != nil {
148160
return err
149161
}
150162
}
@@ -160,7 +172,7 @@ func pushS3WithSession(ctx context.Context, s3Session *s3.S3, bucket *string, im
160172
filepath.Join(baseDir, "manifests", tag),
161173
acl,
162174
mediaType,
163-
manifest,
175+
bytes.NewReader(manifest),
164176
); err != nil {
165177
return err
166178
}
@@ -172,15 +184,22 @@ func pushS3WithSession(ctx context.Context, s3Session *s3.S3, bucket *string, im
172184
filepath.Join(baseDir, "manifests", desc.Digest.String()),
173185
acl,
174186
mediaType,
175-
manifest,
187+
bytes.NewReader(manifest),
176188
); err != nil {
177189
return err
178190
}
179191

180192
return nil
181193
}
182194

183-
func syncObject(ctx context.Context, s3Session *s3.S3, bucket *string, key string, acl *string, contentType *string, b []byte) error {
195+
func syncObject(ctx context.Context, s3Session *s3.S3, bucket *string, key string, acl *string, contentType *string, r io.ReadSeeker) error {
196+
h := sha256.New()
197+
if _, err := io.Copy(h, r); err != nil {
198+
return err
199+
}
200+
calculatedDigest := fmt.Sprintf("sha256:%x", h.Sum(nil))
201+
r.Seek(0, io.SeekStart)
202+
184203
head, err := s3Session.HeadObject(&s3.HeadObjectInput{
185204
Bucket: bucket,
186205
Key: &key,
@@ -191,24 +210,28 @@ func syncObject(ctx context.Context, s3Session *s3.S3, bucket *string, key strin
191210
}
192211
}
193212

213+
headMetadataDigest, digestPresent := head.Metadata["calculatedDigest"]
214+
194215
if head == nil ||
195-
head.ContentLength == nil ||
196-
*head.ContentLength != int64(len(b)) ||
197216
head.ContentType == nil ||
198-
*head.ContentType != *contentType {
217+
*head.ContentType != *contentType ||
218+
!digestPresent ||
219+
*headMetadataDigest != calculatedDigest {
199220
log.Info().
200221
Str("bucket", *bucket).
201222
Str("key", key).
202223
Str("contentType", *contentType).
203-
Int64("contentLength", int64(len(b))).
204224
Msg("Syncing object")
205225

206226
if _, err := s3Session.PutObject(&s3.PutObjectInput{
207227
Bucket: bucket,
208228
Key: &key,
209-
Body: bytes.NewReader(b),
229+
Body: r,
210230
ACL: acl,
211231
ContentType: contentType,
232+
Metadata: map[string]*string{
233+
"calculatedDigest": aws.String(calculatedDigest),
234+
},
212235
}); err != nil {
213236
return err
214237
}

0 commit comments

Comments
 (0)