Skip to content

feat(s3-perf-test): Create s3-perf-test image #88

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/s3-perf-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
on:
workflow_dispatch:
inputs:
commit:
description: 'Commit to build'
required: true
default: 'master'
push:
paths:
- "s3-perf-test/**"
- ".github/workflows/s3-perf-test.yml"
- ".github/workflows/build.yml"


jobs:
build:
uses: ./.github/workflows/build.yml
secrets: inherit
with:
image-name: s3-perf-test
folder: s3-perf-test
build-args: "--build-arg COMMIT=${{ github.event.inputs.commit }}"
19 changes: 19 additions & 0 deletions s3-perf-test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM golang:1.23.2-bookworm AS builder
ARG TARGETOS
ARG TARGETARCH

WORKDIR /app

COPY . .

RUN go build -o /s3-perf-test main.go

FROM debian:bookworm-slim

RUN apt-get update && apt-get install -y ca-certificates

RUN update-ca-certificates

COPY --from=builder /s3-perf-test .

ENTRYPOINT [ "/s3-perf-test" ]
27 changes: 27 additions & 0 deletions s3-perf-test/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module github.com/coreweave/ml-containers/s3-perf-test

go 1.23.2

require (
github.com/hashicorp/go-cleanhttp v0.5.2
github.com/jedib0t/go-pretty/v6 v6.6.7
github.com/minio/minio-go/v7 v7.0.87
)

require (
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-ini/ini v1.67.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/minio/crc64nvme v1.0.1 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/rs/xid v1.6.0 // indirect
golang.org/x/crypto v0.33.0 // indirect
golang.org/x/net v0.35.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/text v0.22.0 // indirect
)
46 changes: 46 additions & 0 deletions s3-perf-test/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/jedib0t/go-pretty/v6 v6.6.7 h1:m+LbHpm0aIAPLzLbMfn8dc3Ht8MW7lsSO4MPItz/Uuo=
github.com/jedib0t/go-pretty/v6 v6.6.7/go.mod h1:YwC5CE4fJ1HFUDeivSV1r//AmANFHyqczZk+U6BDALU=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/minio/crc64nvme v1.0.1 h1:DHQPrYPdqK7jQG/Ls5CTBZWeex/2FMS3G5XGkycuFrY=
github.com/minio/crc64nvme v1.0.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.87 h1:nkr9x0u53PespfxfUqxP3UYWiE2a41gaofgNnC4Y8WQ=
github.com/minio/minio-go/v7 v7.0.87/go.mod h1:33+O8h0tO7pCeCWwBVa07RhVVfB/3vS4kEX7rwYKmIg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
258 changes: 258 additions & 0 deletions s3-perf-test/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
package main

import (
"context"
"fmt"
"io"
"log/slog"
"math/rand"
"os"
"strconv"
"sync"
"time"

"github.com/hashicorp/go-cleanhttp"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/minio/minio-go/v7"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious: why the minio client?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Far more convenient essentially! No code for endpoint resolvers, pointers, etc. Per is more or less the same since they both use the underlying net/http.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, makes sense!

"github.com/minio/minio-go/v7/pkg/credentials"
)

func main() {
accessKeyID := os.Getenv("S3_ACCESS_KEY_ID")
secretKey := os.Getenv("S3_ACCESS_KEY_SECRET_KEY")
region := os.Getenv("S3_REGION")
endpoint := os.Getenv("S3_ENDPOINT")
gomaxprocs := os.Getenv("GOMAXPROCS")
bucket := os.Getenv("S3_BUCKET")
prefix := os.Getenv("S3_PREFIX")
objectCount := os.Getenv("MAX_OBJECTS")
testDuration := os.Getenv("TEST_DURATION")
useSSL := os.Getenv("USE_SSL")

if accessKeyID == "" || secretKey == "" {
slog.Error("S3_ACCESS_KEY_ID and S3_ACCESS_KEY_SECRET_KEY are required to run")

os.Exit(1)
}

if region == "" || endpoint == "" {
slog.Error("S3_REGION and S3_ENDPOINT are required to run")

os.Exit(1)
}

if bucket == "" {
slog.Error("S3_BUCKET is required to run")

os.Exit(1)
}

threads := 1
maxObjects := 1000
var err error

if gomaxprocs != "" {
threads, err = strconv.Atoi(gomaxprocs)
if err != nil {
slog.Error("failed to convert GOMAXPROCS to int", "error", err)

os.Exit(1)
}
}

if objectCount != "" {
maxObjects, err = strconv.Atoi(objectCount)
if err != nil {
slog.Error("failed to convert MAX_OBJECTS to int", "error", err)

os.Exit(1)
}
}

duration := time.Minute * 10

if testDuration != "" {
duration, err = time.ParseDuration(testDuration)
if err != nil {
slog.Error("failed to parse TEST_DURATION", "error", err)

os.Exit(1)
}
}

ssl := true
if useSSL != "" {
ssl, err = strconv.ParseBool(useSSL)
if err != nil {
slog.Error("failed to parse USE_SSL", "error", err)

os.Exit(1)
}
}

ctx, cancel := context.WithCancel(context.Background())
transport := cleanhttp.DefaultPooledTransport()

s3Client, err := minio.New(endpoint, &minio.Options{
Creds: credentials.NewStaticV4(accessKeyID, secretKey, ""),
Secure: ssl,
Transport: transport,
Copy link

@codyohl codyohl Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, why this http transport implementation?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cleanhttp just sets sane defaults on the normal golang net/http. I then took some known good defaults for specifically load testing and then overwrite the defaults that the cleanhttp doesn't set/sets to something I don't want. Essentially its just a quicker short hand, but realistically not needed.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense, thx!

Region: region,
BucketLookup: minio.BucketLookupDNS,
})
if err != nil {
slog.Error("failed to create s3 client", "error", err)

os.Exit(1)
}

var objects []string
var sizes []int64

objChan := s3Client.ListObjects(ctx, bucket, minio.ListObjectsOptions{
WithMetadata: true,
Prefix: prefix,
MaxKeys: maxObjects,
})

for obj := range objChan {
if obj.Err != nil {
slog.Error("error listing objects", "error", err)

os.Exit(1)
}

objects = append(objects, obj.Key)
sizes = append(sizes, obj.Size)
}

rng := rand.New(rand.NewSource(time.Now().Unix()))
timer := time.NewTimer(duration)
resultChan := make(chan *testResult, threads)
wg := &sync.WaitGroup{}

params := &testParams{
Bucket: bucket,
RNG: rng,
Objects: objects,
ObjectSizes: sizes,
S3Client: s3Client,
}

start := time.Now().UTC()

for i := range threads {
wg.Add(1)
go func() {
result := runTest(ctx, params, i)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

something that's nice is the ability for all the tests on different nodes to coordinate enough to gang-schedule themselves at approximately the exact same time in the cluster (esp. for various types of workloads)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sort of like a sync barrier almost?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sort of like a sync barrier almost?

yes, precisely that (we use NFS for coordination, since it needs to be low-dep / cluster agnostic ideally)

resultChan <- result
wg.Done()
}()
}

<-timer.C

slog.Info("test done")

// Tell our tests that we're done and wait
cancel()
wg.Wait()

aggregatedBytesRead := 0
aggregatedRequestsSent := 0
var averageTTLB int64

for range threads {
result := <-resultChan

aggregatedBytesRead += int(result.TotalBytesRead)
aggregatedRequestsSent += int(result.TotalRequestsSent)
averageTTLB += result.TotalTTLBMS
}

averageTTLB = averageTTLB / int64(aggregatedRequestsSent)

timeSpent := time.Since(start)

t := table.NewWriter()
t.SetOutputMirror(os.Stdout)
t.AppendHeader(table.Row{"", "RESULTS"})
t.AppendRow(table.Row{"Time Spent", timeSpent.String()})
t.AppendRow(table.Row{"Total Bytes Read", fmt.Sprintf("%d", aggregatedBytesRead)})
t.AppendRow(table.Row{"Total Requests Sent", fmt.Sprintf("%d", aggregatedRequestsSent)})
t.AppendRow(table.Row{"Average TTLB", fmt.Sprintf("%d", averageTTLB)})

t.Render()
}

type testParams struct {
Bucket string
RNG *rand.Rand
Objects []string
ObjectSizes []int64
S3Client *minio.Client
}

type testResult struct {
TotalBytesRead int64
TotalRequestsSent int64
TotalTTLBMS int64
}

func runTest(ctx context.Context, params *testParams, id int) *testResult {
ll := slog.With("ID", id)

ll.Info("starting test")

result := &testResult{}

testCtx := context.Background()

for {
select {
case <-ctx.Done():
ll.Info("context cancelled, stopping test")

return result
default:
// Get our random object
randObjIndex := params.RNG.Int() % len(params.Objects)
obj := params.Objects[randObjIndex]
size := params.ObjectSizes[randObjIndex]

// Get a random 16KiB offset to read
maxOffset := size / (16 * 1024)
randObjOffset := params.RNG.Int() % int(maxOffset)

rangeStart := int64(randObjOffset * (16 * 1024))
rangeEnd := min(int64((rangeStart + (16 * 1024))), size)

result.TotalRequestsSent++

start := time.Now().UTC()

reqOpts := minio.GetObjectOptions{}
reqOpts.SetRange(rangeStart, rangeEnd)

resp, err := params.S3Client.GetObject(testCtx, params.Bucket, obj, reqOpts)

if err != nil {
ll.Error("failed to fetch range", "error", err)

continue
}

amount, err := io.Copy(io.Discard, resp)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if developing write workloads as well, I've found pooling/buffer reuse helpful

resp.Close()

result.TotalBytesRead += amount
result.TotalTTLBMS += time.Since(start).Milliseconds()

if err != nil {
ll.Error("failed to discard response body", "error", err)

continue
}
}
}
}
Loading