-
Notifications
You must be signed in to change notification settings - Fork 5
feat(s3-perf-test): Create s3-perf-test image #88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
on: | ||
workflow_dispatch: | ||
inputs: | ||
commit: | ||
description: 'Commit to build' | ||
required: true | ||
default: 'master' | ||
push: | ||
paths: | ||
- "s3-perf-test/**" | ||
- ".github/workflows/s3-perf-test.yml" | ||
- ".github/workflows/build.yml" | ||
|
||
|
||
jobs: | ||
build: | ||
uses: ./.github/workflows/build.yml | ||
secrets: inherit | ||
with: | ||
image-name: s3-perf-test | ||
folder: s3-perf-test | ||
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
FROM golang:1.23.2-bookworm AS builder | ||
ARG TARGETOS | ||
ARG TARGETARCH | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN go build -o /s3-perf-test main.go | ||
|
||
FROM debian:bookworm-slim | ||
|
||
RUN apt-get update && apt-get install -y ca-certificates | ||
|
||
RUN update-ca-certificates | ||
|
||
COPY --from=builder /s3-perf-test . | ||
|
||
ENTRYPOINT [ "/s3-perf-test" ] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
module github.com/coreweave/ml-containers/s3-perf-test | ||
|
||
go 1.23.2 | ||
|
||
require ( | ||
github.com/hashicorp/go-cleanhttp v0.5.2 | ||
github.com/jedib0t/go-pretty/v6 v6.6.7 | ||
github.com/minio/minio-go/v7 v7.0.87 | ||
) | ||
|
||
require ( | ||
github.com/dustin/go-humanize v1.0.1 // indirect | ||
github.com/go-ini/ini v1.67.0 // indirect | ||
github.com/goccy/go-json v0.10.5 // indirect | ||
github.com/google/uuid v1.6.0 // indirect | ||
github.com/klauspost/compress v1.17.11 // indirect | ||
github.com/klauspost/cpuid/v2 v2.2.9 // indirect | ||
github.com/mattn/go-runewidth v0.0.16 // indirect | ||
github.com/minio/crc64nvme v1.0.1 // indirect | ||
github.com/minio/md5-simd v1.1.2 // indirect | ||
github.com/rivo/uniseg v0.4.7 // indirect | ||
github.com/rs/xid v1.6.0 // indirect | ||
golang.org/x/crypto v0.33.0 // indirect | ||
golang.org/x/net v0.35.0 // indirect | ||
golang.org/x/sys v0.30.0 // indirect | ||
golang.org/x/text v0.22.0 // indirect | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= | ||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= | ||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= | ||
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= | ||
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= | ||
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= | ||
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= | ||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= | ||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= | ||
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= | ||
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= | ||
github.com/jedib0t/go-pretty/v6 v6.6.7 h1:m+LbHpm0aIAPLzLbMfn8dc3Ht8MW7lsSO4MPItz/Uuo= | ||
github.com/jedib0t/go-pretty/v6 v6.6.7/go.mod h1:YwC5CE4fJ1HFUDeivSV1r//AmANFHyqczZk+U6BDALU= | ||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= | ||
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= | ||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= | ||
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= | ||
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= | ||
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= | ||
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= | ||
github.com/minio/crc64nvme v1.0.1 h1:DHQPrYPdqK7jQG/Ls5CTBZWeex/2FMS3G5XGkycuFrY= | ||
github.com/minio/crc64nvme v1.0.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= | ||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= | ||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= | ||
github.com/minio/minio-go/v7 v7.0.87 h1:nkr9x0u53PespfxfUqxP3UYWiE2a41gaofgNnC4Y8WQ= | ||
github.com/minio/minio-go/v7 v7.0.87/go.mod h1:33+O8h0tO7pCeCWwBVa07RhVVfB/3vS4kEX7rwYKmIg= | ||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= | ||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= | ||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= | ||
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= | ||
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= | ||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= | ||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= | ||
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= | ||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= | ||
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= | ||
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= | ||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= | ||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= | ||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= | ||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= | ||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"io" | ||
"log/slog" | ||
"math/rand" | ||
"os" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
"github.com/hashicorp/go-cleanhttp" | ||
"github.com/jedib0t/go-pretty/v6/table" | ||
"github.com/minio/minio-go/v7" | ||
"github.com/minio/minio-go/v7/pkg/credentials" | ||
) | ||
|
||
func main() { | ||
accessKeyID := os.Getenv("S3_ACCESS_KEY_ID") | ||
secretKey := os.Getenv("S3_ACCESS_KEY_SECRET_KEY") | ||
region := os.Getenv("S3_REGION") | ||
endpoint := os.Getenv("S3_ENDPOINT") | ||
gomaxprocs := os.Getenv("GOMAXPROCS") | ||
bucket := os.Getenv("S3_BUCKET") | ||
prefix := os.Getenv("S3_PREFIX") | ||
objectCount := os.Getenv("MAX_OBJECTS") | ||
testDuration := os.Getenv("TEST_DURATION") | ||
useSSL := os.Getenv("USE_SSL") | ||
|
||
if accessKeyID == "" || secretKey == "" { | ||
slog.Error("S3_ACCESS_KEY_ID and S3_ACCESS_KEY_SECRET_KEY are required to run") | ||
|
||
os.Exit(1) | ||
} | ||
|
||
if region == "" || endpoint == "" { | ||
slog.Error("S3_REGION and S3_ENDPOINT are required to run") | ||
|
||
os.Exit(1) | ||
} | ||
|
||
if bucket == "" { | ||
slog.Error("S3_BUCKET is required to run") | ||
|
||
os.Exit(1) | ||
} | ||
|
||
threads := 1 | ||
maxObjects := 1000 | ||
var err error | ||
|
||
if gomaxprocs != "" { | ||
threads, err = strconv.Atoi(gomaxprocs) | ||
if err != nil { | ||
slog.Error("failed to convert GOMAXPROCS to int", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
} | ||
|
||
if objectCount != "" { | ||
maxObjects, err = strconv.Atoi(objectCount) | ||
if err != nil { | ||
slog.Error("failed to convert MAX_OBJECTS to int", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
} | ||
|
||
duration := time.Minute * 10 | ||
|
||
if testDuration != "" { | ||
duration, err = time.ParseDuration(testDuration) | ||
if err != nil { | ||
slog.Error("failed to parse TEST_DURATION", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
} | ||
|
||
ssl := true | ||
if useSSL != "" { | ||
ssl, err = strconv.ParseBool(useSSL) | ||
if err != nil { | ||
slog.Error("failed to parse USE_SSL", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
} | ||
|
||
ctx, cancel := context.WithCancel(context.Background()) | ||
transport := cleanhttp.DefaultPooledTransport() | ||
|
||
s3Client, err := minio.New(endpoint, &minio.Options{ | ||
Creds: credentials.NewStaticV4(accessKeyID, secretKey, ""), | ||
Secure: ssl, | ||
Transport: transport, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. curious, why this http transport implementation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cleanhttp just sets sane defaults on the normal golang net/http. I then took some known good defaults for specifically load testing and then overwrite the defaults that the cleanhttp doesn't set/sets to something I don't want. Essentially its just a quicker short hand, but realistically not needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense, thx! |
||
Region: region, | ||
BucketLookup: minio.BucketLookupDNS, | ||
}) | ||
if err != nil { | ||
slog.Error("failed to create s3 client", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
|
||
var objects []string | ||
var sizes []int64 | ||
|
||
objChan := s3Client.ListObjects(ctx, bucket, minio.ListObjectsOptions{ | ||
WithMetadata: true, | ||
Prefix: prefix, | ||
MaxKeys: maxObjects, | ||
}) | ||
|
||
for obj := range objChan { | ||
if obj.Err != nil { | ||
slog.Error("error listing objects", "error", err) | ||
|
||
os.Exit(1) | ||
} | ||
|
||
objects = append(objects, obj.Key) | ||
sizes = append(sizes, obj.Size) | ||
} | ||
|
||
rng := rand.New(rand.NewSource(time.Now().Unix())) | ||
timer := time.NewTimer(duration) | ||
resultChan := make(chan *testResult, threads) | ||
wg := &sync.WaitGroup{} | ||
|
||
params := &testParams{ | ||
Bucket: bucket, | ||
RNG: rng, | ||
Objects: objects, | ||
ObjectSizes: sizes, | ||
S3Client: s3Client, | ||
} | ||
|
||
start := time.Now().UTC() | ||
|
||
for i := range threads { | ||
wg.Add(1) | ||
go func() { | ||
result := runTest(ctx, params, i) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. something that's nice is the ability for all the tests on different nodes to coordinate enough to gang-schedule themselves at approximately the exact same time in the cluster (esp. for various types of workloads) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah sort of like a sync barrier almost? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
yes, precisely that (we use NFS for coordination, since it needs to be low-dep / cluster agnostic ideally) |
||
resultChan <- result | ||
wg.Done() | ||
}() | ||
} | ||
|
||
<-timer.C | ||
|
||
slog.Info("test done") | ||
|
||
// Tell our tests that we're done and wait | ||
cancel() | ||
wg.Wait() | ||
|
||
aggregatedBytesRead := 0 | ||
aggregatedRequestsSent := 0 | ||
var averageTTLB int64 | ||
|
||
for range threads { | ||
result := <-resultChan | ||
|
||
aggregatedBytesRead += int(result.TotalBytesRead) | ||
aggregatedRequestsSent += int(result.TotalRequestsSent) | ||
averageTTLB += result.TotalTTLBMS | ||
} | ||
|
||
averageTTLB = averageTTLB / int64(aggregatedRequestsSent) | ||
|
||
timeSpent := time.Since(start) | ||
|
||
t := table.NewWriter() | ||
t.SetOutputMirror(os.Stdout) | ||
t.AppendHeader(table.Row{"", "RESULTS"}) | ||
t.AppendRow(table.Row{"Time Spent", timeSpent.String()}) | ||
t.AppendRow(table.Row{"Total Bytes Read", fmt.Sprintf("%d", aggregatedBytesRead)}) | ||
t.AppendRow(table.Row{"Total Requests Sent", fmt.Sprintf("%d", aggregatedRequestsSent)}) | ||
t.AppendRow(table.Row{"Average TTLB", fmt.Sprintf("%d", averageTTLB)}) | ||
|
||
t.Render() | ||
} | ||
|
||
type testParams struct { | ||
Bucket string | ||
RNG *rand.Rand | ||
Objects []string | ||
ObjectSizes []int64 | ||
S3Client *minio.Client | ||
} | ||
|
||
type testResult struct { | ||
TotalBytesRead int64 | ||
TotalRequestsSent int64 | ||
TotalTTLBMS int64 | ||
} | ||
|
||
func runTest(ctx context.Context, params *testParams, id int) *testResult { | ||
ll := slog.With("ID", id) | ||
|
||
ll.Info("starting test") | ||
|
||
result := &testResult{} | ||
|
||
testCtx := context.Background() | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
ll.Info("context cancelled, stopping test") | ||
|
||
return result | ||
default: | ||
// Get our random object | ||
randObjIndex := params.RNG.Int() % len(params.Objects) | ||
obj := params.Objects[randObjIndex] | ||
size := params.ObjectSizes[randObjIndex] | ||
|
||
// Get a random 16KiB offset to read | ||
maxOffset := size / (16 * 1024) | ||
randObjOffset := params.RNG.Int() % int(maxOffset) | ||
|
||
rangeStart := int64(randObjOffset * (16 * 1024)) | ||
rangeEnd := min(int64((rangeStart + (16 * 1024))), size) | ||
|
||
result.TotalRequestsSent++ | ||
|
||
start := time.Now().UTC() | ||
|
||
reqOpts := minio.GetObjectOptions{} | ||
reqOpts.SetRange(rangeStart, rangeEnd) | ||
|
||
resp, err := params.S3Client.GetObject(testCtx, params.Bucket, obj, reqOpts) | ||
|
||
if err != nil { | ||
ll.Error("failed to fetch range", "error", err) | ||
|
||
continue | ||
} | ||
|
||
amount, err := io.Copy(io.Discard, resp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if developing write workloads as well, I've found pooling/buffer reuse helpful |
||
resp.Close() | ||
|
||
result.TotalBytesRead += amount | ||
result.TotalTTLBMS += time.Since(start).Milliseconds() | ||
|
||
if err != nil { | ||
ll.Error("failed to discard response body", "error", err) | ||
|
||
continue | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curious: why the minio client?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Far more convenient essentially! No code for endpoint resolvers, pointers, etc. Per is more or less the same since they both use the underlying net/http.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, makes sense!