-
Notifications
You must be signed in to change notification settings - Fork 7
Add benchmark for Cilium custom resource stale list #147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,290 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT License. | ||
|
|
||
| package bench | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "sync/atomic" | ||
| "time" | ||
|
|
||
| internaltypes "github.com/Azure/kperf/contrib/internal/types" | ||
| "github.com/Azure/kperf/contrib/internal/utils" | ||
| "github.com/google/uuid" | ||
| "golang.org/x/sync/errgroup" | ||
| "k8s.io/klog/v2" | ||
|
|
||
| "github.com/urfave/cli" | ||
| ) | ||
|
|
||
| const ( | ||
| numCRApplyWorkers = 50 | ||
| maxNumCRApplyAttempts = 5 | ||
| kubectlApplyTimeout = 30 * time.Second | ||
| progressReportInterval = 10 * time.Second | ||
| installCiliumCRDsFlag = "install-cilium-crds" | ||
| numCEPFlag = "num-cilium-endpoints" | ||
| numCIDFlag = "num-cilium-identities" | ||
| ) | ||
|
|
||
| var benchCiliumCustomResourceListCase = cli.Command{ | ||
| Name: "cilium_cr_list", | ||
| Usage: ` | ||
|
|
||
| Simulate workload with stale list requests for Cilium custom resources. | ||
|
|
||
| This benchmark MUST be run in a cluster *without* Cilium installed, so Cilium doesn't | ||
| delete or modify the synthetic CiliumEndpoint and CiliumIdentity resources created in this test. | ||
| `, | ||
| Flags: append( | ||
| []cli.Flag{ | ||
| cli.BoolTFlag{ | ||
| Name: installCiliumCRDsFlag, | ||
| Usage: "Install Cilium CRDs if they don't already exist (default: true)", | ||
| }, | ||
| cli.IntFlag{ | ||
| Name: numCIDFlag, | ||
| Usage: "Number of CiliumIdentities to generate (default: 1000)", | ||
| Value: 1000, | ||
| }, | ||
| cli.IntFlag{ | ||
| Name: numCEPFlag, | ||
| Usage: "Number of CiliumEndpoints to generate (default: 1000)", | ||
| Value: 1000, | ||
| }, | ||
| }, | ||
| commonFlags..., | ||
| ), | ||
| Action: func(cliCtx *cli.Context) error { | ||
| _, err := renderBenchmarkReportInterceptor(ciliumCustomResourceListRun)(cliCtx) | ||
| return err | ||
| }, | ||
| } | ||
|
|
||
| // ciliumCustomResourceListRun runs a benchmark that: | ||
| // (1) creates many Cilium custom resources (CiliumIdentity and CiliumEndpoint). | ||
| // (2) executes stale list requests against those resources. | ||
| // This simulates a "worst case" scenario in which Cilium performs many expensive list requests. | ||
| func ciliumCustomResourceListRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) { | ||
| ctx := context.Background() | ||
|
|
||
| rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx, "loadprofile/cilium_cr_list.yaml") | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| defer func() { _ = rgCfgFileDone() }() | ||
|
|
||
| kubeCfgPath := cliCtx.GlobalString("kubeconfig") | ||
| kr := utils.NewKubectlRunner(kubeCfgPath, "") | ||
|
|
||
| if cliCtx.BoolT(installCiliumCRDsFlag) { | ||
| if err := installCiliumCRDs(ctx, kr); err != nil { | ||
| return nil, fmt.Errorf("failed to install Cilium CRDs: %w", err) | ||
| } | ||
| } | ||
|
|
||
| numCID := cliCtx.Int(numCIDFlag) | ||
| numCEP := cliCtx.Int(numCEPFlag) | ||
| if err := loadCiliumData(ctx, kr, numCID, numCEP); err != nil { | ||
| return nil, fmt.Errorf("failed to load Cilium data: %w", err) | ||
| } | ||
|
|
||
| rgResult, err := utils.DeployRunnerGroup(ctx, | ||
| cliCtx.GlobalString("kubeconfig"), | ||
| cliCtx.GlobalString("runner-image"), | ||
| rgCfgFile, | ||
| cliCtx.GlobalString("runner-flowcontrol"), | ||
| cliCtx.GlobalString("rg-affinity"), | ||
| ) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to deploy runner group: %w", err) | ||
| } | ||
|
|
||
| return &internaltypes.BenchmarkReport{ | ||
| Description: fmt.Sprintf(`Deploy %d CiliumIdentities and %d CiliumEndpoints, then run stale list requests against them`, numCID, numCEP), | ||
| LoadSpec: *rgSpec, | ||
| Result: *rgResult, | ||
| Info: map[string]interface{}{ | ||
| "numCiliumIdentities": numCID, | ||
| "numCiliumEndpoints": numCEP, | ||
| }, | ||
| }, nil | ||
| } | ||
|
|
||
| var ciliumCRDs = []string{ | ||
| "https://raw.githubusercontent.com/cilium/cilium/refs/tags/v1.16.6/pkg/k8s/apis/cilium.io/client/crds/v2/ciliumendpoints.yaml", | ||
| "https://raw.githubusercontent.com/cilium/cilium/refs/tags/v1.16.6/pkg/k8s/apis/cilium.io/client/crds/v2/ciliumidentities.yaml", | ||
| } | ||
|
|
||
| func installCiliumCRDs(ctx context.Context, kr *utils.KubectlRunner) error { | ||
| klog.V(0).Info("Installing Cilium CRDs...") | ||
| for _, crdURL := range ciliumCRDs { | ||
| err := kr.Apply(ctx, kubectlApplyTimeout, crdURL) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to apply CRD %s: %v", crdURL, err) | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func loadCiliumData(ctx context.Context, kr *utils.KubectlRunner, numCID int, numCEP int) error { | ||
| totalNumResources := numCID + numCEP | ||
| klog.V(0).Infof("Loading Cilium data (%d CiliumIdentities and %d CiliumEndpoints)...", numCID, numCEP) | ||
|
|
||
| // Parallelize kubectl apply to speed it up. Achieves ~80 inserts/sec. | ||
| taskChan := make(chan string, numCRApplyWorkers*2) | ||
| var appliedCount atomic.Uint64 | ||
| g, ctx := errgroup.WithContext(ctx) | ||
| for i := 0; i < numCRApplyWorkers; i++ { | ||
| g.Go(func() error { | ||
| for { | ||
| select { | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| case ciliumResourceData, ok := <-taskChan: | ||
| if !ok { | ||
| return nil // taskChan closed | ||
| } | ||
| var err error | ||
| for i := 0; i < maxNumCRApplyAttempts; i++ { | ||
| err = kr.ServerSideApplyWithData(ctx, kubectlApplyTimeout, ciliumResourceData) | ||
| if err == nil { | ||
| appliedCount.Add(1) | ||
| break | ||
| } else if i < maxNumCRApplyAttempts-1 { | ||
| klog.Warningf("Failed to apply cilium resource data, will retry: %s", err) | ||
| } | ||
| } | ||
| if err != nil { // last retry failed, so give up. | ||
| return fmt.Errorf("failed to apply cilium resource data: %w", err) | ||
| } | ||
| } | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| // Report progress periodically. | ||
| reporterDoneChan := make(chan struct{}) | ||
| g.Go(func() error { | ||
| timer := time.NewTicker(progressReportInterval) | ||
| defer timer.Stop() | ||
| for { | ||
| select { | ||
| case <-reporterDoneChan: | ||
| return nil | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| case <-timer.C: | ||
| c := appliedCount.Load() | ||
| percent := int(float64(c) / float64(totalNumResources) * 100) | ||
| klog.V(0).Infof("Applied %d/%d cilium resources (%d%%)", c, totalNumResources, percent) | ||
| } | ||
| } | ||
| }) | ||
|
|
||
| // Generate CiliumIdentity and CiliumEndpoint CRs to be applied by the worker goroutines. | ||
| g.Go(func() error { | ||
| for i := 0; i < numCID; i++ { | ||
| select { | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| case taskChan <- generateCiliumIdentity(): | ||
| } | ||
| } | ||
|
|
||
| for i := 0; i < numCEP; i++ { | ||
| select { | ||
| case <-ctx.Done(): | ||
| return ctx.Err() | ||
| case taskChan <- generateCiliumEndpoint(): | ||
| } | ||
| } | ||
|
|
||
| close(taskChan) // signal to consumer goroutines that we're done. | ||
| close(reporterDoneChan) | ||
| return nil | ||
| }) | ||
|
|
||
| if err := g.Wait(); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| klog.V(0).Infof("Loaded %d CiliumIdentities and %d CiliumEndpoints\n", numCID, numCEP) | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func generateCiliumIdentity() string { | ||
| identityName := uuid.New().String() | ||
| return fmt.Sprintf(` | ||
| apiVersion: cilium.io/v2 | ||
| kind: CiliumIdentity | ||
| metadata: | ||
| name: "%s" | ||
| security-labels: | ||
| k8s:io.cilium.k8s.namespace.labels.control-plane: "true" | ||
| k8s:io.cilium.k8s.namespace.labels.kubernetes.azure.com/managedby: aks | ||
| k8s:io.cilium.k8s.namespace.labels.kubernetes.io/cluster-service: "true" | ||
| k8s:io.cilium.k8s.namespace.labels.kubernetes.io/metadata.name: kube-system | ||
| k8s:io.cilium.k8s.policy.cluster: default | ||
| k8s:io.cilium.k8s.policy.serviceaccount: coredns | ||
| k8s:io.kubernetes.pod.namespace: kube-system | ||
| k8s:k8s-app: kube-dns | ||
| k8s:kubernetes.azure.com/managedby: aks | ||
| k8s:version: v20`, identityName) | ||
| } | ||
|
|
||
| func generateCiliumEndpoint() string { | ||
| cepName := uuid.New().String() | ||
| return fmt.Sprintf(` | ||
| apiVersion: cilium.io/v2 | ||
| kind: CiliumEndpoint | ||
| metadata: | ||
| name: "%s" | ||
| status: | ||
| encryption: {} | ||
| external-identifiers: | ||
| container-id: 790d85075c394a8384f8b1a0fec62e2316c2556d175dab0c1fe676e5a6d92f33 | ||
| k8s-namespace: kube-system | ||
| k8s-pod-name: coredns-54b69f46b8-dbcdl | ||
| pod-name: kube-system/coredns-54b69f46b8-dbcdl | ||
| id: 1453 | ||
| identity: | ||
| id: 0000001 | ||
| labels: | ||
| - k8s:io.cilium.k8s.namespace.labels.control-plane=true | ||
| - k8s:io.cilium.k8s.namespace.labels.kubernetes.azure.com/managedby=aks | ||
| - k8s:io.cilium.k8s.namespace.labels.kubernetes.io/cluster-service=true | ||
| - k8s:io.cilium.k8s.namespace.labels.kubernetes.io/metadata.name=kube-system | ||
| - k8s:io.cilium.k8s.policy.cluster=default | ||
| - k8s:io.cilium.k8s.policy.serviceaccount=coredns | ||
| - k8s:io.kubernetes.pod.namespace=kube-system | ||
| - k8s:k8s-app=kube-dns | ||
| - k8s:kubernetes.azure.com/managedby=aks | ||
| - k8s:version=v20 | ||
| named-ports: | ||
| - name: dns | ||
| port: 53 | ||
| protocol: UDP | ||
| - name: dns-tcp | ||
| port: 53 | ||
| protocol: TCP | ||
| - name: metrics | ||
| port: 9153 | ||
| protocol: TCP | ||
| networking: | ||
| addressing: | ||
| - ipv4: 10.244.1.38 | ||
| node: 10.224.0.4 | ||
| policy: | ||
| egress: | ||
| enforcing: false | ||
| state: <status disabled> | ||
| ingress: | ||
| enforcing: false | ||
| state: <status disabled> | ||
| state: ready | ||
| visibility-policy-status: <status disabled> | ||
| `, cepName) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
contrib/internal/manifests/loadprofile/cilium_cr_list.yaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| # count defines how many runners in the group. | ||
| count: 10 | ||
|
|
||
| # This simulates worst-case behavior for Cilium: | ||
| # every agent hammering apiserver with stale LIST requests for CiliumIdentity and CiliumEndpoint. | ||
| # The request rate is much, much higher than what we'd expect to see in production with | ||
| # client-go exponential backoff configured, so if apiserver can survive this onslaught it can | ||
| # survive anything Cilium throws at it. | ||
| loadProfile: | ||
| version: 1 | ||
| description: cilium list profile | ||
| spec: | ||
| rate: 20 # 20 req/sec * 10 runners = 200 req/sec | ||
| total: 12000 # run for ~10 minutes, 600 seconds * 20/sec = 12000 | ||
| # 5k node cluster, one cilium agent per node | ||
| # divided by the number of runners. | ||
| conns: 500 | ||
| client: 500 | ||
|
|
||
| contentType: json | ||
| disableHTTP2: false | ||
|
|
||
| # 50/50 mix of ciliumidentity and ciliumendpoint queries. | ||
| # We're simulating with CilumEndpointSlice disabled here, on the assumption that CES will always | ||
| # have lower count than CEP, so if we can survive with CEP only then we're in good shape. | ||
| requests: | ||
| - staleList: | ||
| group: cilium.io | ||
| version: v2 | ||
| resource: ciliumidentities | ||
| shares: 1000 # Has 50% chance = 1000 / (1000 + 1000) | ||
| - staleList: | ||
| group: cilium.io | ||
| version: v2 | ||
| resource: ciliumendpoints | ||
| namespace: "default" | ||
| shares: 1000 # Has 50% chance = 1000 / (1000 + 1000) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.