From 94874685f1fb1c287edb27d91baa32201156ce5b Mon Sep 17 00:00:00 2001 From: James Masson Date: Thu, 2 May 2024 15:55:00 +0100 Subject: [PATCH] feat(CAF-565): K8s support for testing with Java media driver --- Dockerfile | 2 +- README.md | 39 ++-- scripts/k8s-remote-testing.sh | 17 +- scripts/k8s/aeron-echo-dpdk/aeronmd-dpdk.yml | 36 ++++ scripts/k8s/aeron-echo-dpdk/echo-client.yml | 6 +- scripts/k8s/aeron-echo-dpdk/kustomization.yml | 9 +- scripts/k8s/aeron-echo-dpdk/settings.yml | 17 ++ scripts/k8s/aeron-echo-java/aeronmd-java.yml | 19 ++ scripts/k8s/aeron-echo-java/echo-client.yml | 19 ++ scripts/k8s/aeron-echo-java/kustomization.yml | 18 ++ scripts/k8s/aeron-echo-java/settings.yml | 15 ++ scripts/k8s/base/base-containers.yml | 62 +++++++ .../{aeron-echo-dpdk => base}/echo-server.yml | 2 +- scripts/k8s/base/k8s-benchmark.yml | 175 +----------------- scripts/k8s/base/kustomization.yml | 11 ++ scripts/k8s/base/settings.yml | 10 +- scripts/k8s/k8s-benchmark-entrypoint.sh | 54 ------ scripts/k8s/k8s-common | 133 +++++++++++++ scripts/k8s/k8s-echo-client.sh | 30 +++ scripts/k8s/k8s-echo-server.sh | 27 +++ scripts/k8s/k8s-java-media-driver.sh | 26 +++ 21 files changed, 465 insertions(+), 262 deletions(-) create mode 100644 scripts/k8s/aeron-echo-dpdk/aeronmd-dpdk.yml create mode 100644 scripts/k8s/aeron-echo-dpdk/settings.yml create mode 100644 scripts/k8s/aeron-echo-java/aeronmd-java.yml create mode 100644 scripts/k8s/aeron-echo-java/echo-client.yml create mode 100644 scripts/k8s/aeron-echo-java/kustomization.yml create mode 100644 scripts/k8s/aeron-echo-java/settings.yml create mode 100644 scripts/k8s/base/base-containers.yml rename scripts/k8s/{aeron-echo-dpdk => base}/echo-server.yml (68%) delete mode 100755 scripts/k8s/k8s-benchmark-entrypoint.sh create mode 100644 scripts/k8s/k8s-common create mode 100755 scripts/k8s/k8s-echo-client.sh create mode 100755 scripts/k8s/k8s-echo-server.sh create mode 100755 scripts/k8s/k8s-java-media-driver.sh diff --git a/Dockerfile b/Dockerfile index c43364be..bf65cae5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update &&\ jq \ lsb-release \ python3-pip \ + numactl \ hwloc &&\ pip3 install --upgrade --user hdr-plot @@ -30,5 +31,4 @@ RUN mkdir -p ${BENCHMARKS_PATH} &&\ tar -C ${BENCHMARKS_PATH} -xf /root/benchmarks.tar &&\ rm -f /root/benchmarks.tar -ENTRYPOINT [ "/opt/aeron-benchmarks/scripts/k8s/k8s-benchmark-entrypoint.sh" ] WORKDIR ${BENCHMARKS_PATH}/scripts diff --git a/README.md b/README.md index e7b75334..ebced912 100644 --- a/README.md +++ b/README.md @@ -236,19 +236,16 @@ will produce plots in which the histograms are grouped by test scenario by defau You will need the following Docker containers built & injected into a repository that you can use. -### Containers +The tests currently support Aeron Echo testing with either Java or C-DPDK media drivers. + +### Components & Containers **Benchmarks:** -This wraps the code in *this* repository. +This is the code in *this* repository. It must be built as a Docker container. -eg. -``` -docker build -t :aeron-benchmarks . -docker push :aeron-benchmarks -``` -**Aeron DPDK Media driver:** +**Optional: Aeron DPDK Media driver** Premium feature. @@ -256,21 +253,33 @@ If required/activated in your test configuration - see https://github.com/real-l This is expected to reside in a container called in an accessible repository. -**Aeron C Media driver:** +**Optional: Aeron C Media driver:** Support coming soon -### Settings +### Running the tests + -1. Update `scripts/k8s/base/settings.yml` with configuration from your test environment. +1. Build the benchmarks container and push it to a repo that your K8s nodes can pull from: + ``` + docker build -t :aeron-benchmarks . + docker push :aeron-benchmarks + ``` +2. Update the following files with configuration from your test environment - you can skip scenario config you don't plan to test + * `scripts/k8s/base/settings.yml` + * `scripts/k8s/base/aeron-echo-dpdk/settings.yml` + * `scripts/k8s/base/aeron-echo-java/settings.yml` -2. Make sure your test environment is the active `kubecontext` +3. Make sure your test environment is the active `kubecontext` -3. If you are attempting to run DPDK tests, make sure you have a DPDK enabled Pod/Host. Setting this up is outside the scope of this documentation, please see https://github.com/AdaptiveConsulting/k8s-dpdk-mgr for an example of how to do this. +4. If you are attempting to run DPDK tests, make sure you have a DPDK enabled Pod/Host. Setting this up is outside the scope of this documentation, please see https://github.com/AdaptiveConsulting/k8s-dpdk-mgr for an example of how to do this. -4. Ensure you are permissioned to write to a K8s namespace, by default the tooling will use the `default` namespace. +5. Ensure you are permissioned to write to a K8s namespace, by default the tooling will use the `default` namespace. -5. Run `./scripts/k8s-remote-testing.sh ( -n my_namespace )` +6. Run: + ``` + ./scripts/k8s-remote-testing.sh (-t aeron-echo-java | aeron-echo-dpdk ) ( -n my_namespace ) + ``` ## Other benchmarks (single machine) Set of latency benchmarks testing round trip time (RTT) between threads or processes (IPC) via FIFO data structures and messaging systems. diff --git a/scripts/k8s-remote-testing.sh b/scripts/k8s-remote-testing.sh index cf038db9..42971ea2 100755 --- a/scripts/k8s-remote-testing.sh +++ b/scripts/k8s-remote-testing.sh @@ -9,7 +9,7 @@ function f_log() { function f_show_help() { f_log "Supported arguments are:" - echo "${0} (-n|--namespace) '' (-t|--test) 'aeron-echo-dpdk'" + echo "${0} (-n|--namespace) '' (-t|--test) 'aeron-echo-dpdk|aeron-echo-java'" } while [[ $# -gt 0 ]] @@ -23,9 +23,10 @@ do ;; -t|--test) TEST_TO_RUN="${2}" - if [[ "${TEST_TO_RUN}" != "aeron-echo-dpdk" ]] - then - f_log "Error: only supported test is 'aeron-echo-dpdk' at the moment" + if [[ "${TEST_TO_RUN}" == "aeron-echo-dpdk" || "${TEST_TO_RUN}" == "aeron-echo-java" ]] + then true + else + f_log "Error: only supported tests are 'aeron-echo-dpdk' or 'aeron-echo-java' at the moment" exit 1 fi shift @@ -33,7 +34,7 @@ do ;; -h|--help) f_show_help - EXIT + exit 1 ;; *) echo "Error, unknown argument: ${option}" @@ -45,7 +46,7 @@ done # Standard vars K8S_NAMESPACE="${K8S_NAMESPACE:-default}" -TEST_TO_RUN="${TEST_TO_RUN:-aeron-echo-dpdk}" +TEST_TO_RUN="${TEST_TO_RUN:-aeron-echo-java}" TIMESTAMP="$(date +"%Y-%m-%d-%H-%M-%S")" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" @@ -67,8 +68,8 @@ f_cleanup_k8s f_log "Generating new benchmark setup for: ${TEST_TO_RUN}" kubectl -n "${K8S_NAMESPACE}" apply --wait=true -k "k8s/${TEST_TO_RUN}/" -kubectl -n "${K8S_NAMESPACE}" wait --for=condition=Ready pod/aeron-benchmark-0 -kubectl -n "${K8S_NAMESPACE}" wait --for=condition=Ready pod/aeron-benchmark-1 +kubectl -n "${K8S_NAMESPACE}" wait --timeout=90s --for=condition=Ready pod/aeron-benchmark-0 +kubectl -n "${K8S_NAMESPACE}" wait --timeout=90s --for=condition=Ready pod/aeron-benchmark-1 # DPDK Media Driver if [[ "${TEST_TO_RUN}" =~ .*-dpdk$ ]] diff --git a/scripts/k8s/aeron-echo-dpdk/aeronmd-dpdk.yml b/scripts/k8s/aeron-echo-dpdk/aeronmd-dpdk.yml new file mode 100644 index 00000000..f2a788fe --- /dev/null +++ b/scripts/k8s/aeron-echo-dpdk/aeronmd-dpdk.yml @@ -0,0 +1,36 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: all + labels: + aeronmd: dpdk +spec: + containers: + - name: aeronmd-dpdk + imagePullPolicy: Always + args: + - "--no-telemetry" + - "/opt/aeron-config/low-latency-driver.properties" + securityContext: + capabilities: + # Required for DPDK + add: + - IPC_LOCK + - SYS_RAWIO + - SYS_ADMIN + envFrom: + - configMapRef: + name: aeron-benchmark-envs-dpdk + volumeMounts: + - name: hugepage-2mi + mountPath: /hugepages-2Mi + - name: shm + mountPath: /dev/shm + - name: config-volume + mountPath: /opt/aeron-config + resources: + limits: + cpu: "8" + memory: 8G + hugepages-2Mi: 1Gi diff --git a/scripts/k8s/aeron-echo-dpdk/echo-client.yml b/scripts/k8s/aeron-echo-dpdk/echo-client.yml index 055cc995..8033b365 100644 --- a/scripts/k8s/aeron-echo-dpdk/echo-client.yml +++ b/scripts/k8s/aeron-echo-dpdk/echo-client.yml @@ -6,14 +6,14 @@ metadata: spec: containers: - name: benchmark + command: ["k8s/k8s-echo-client.sh"] + # These are options passed to the benchmark-runner script args: - - "./benchmark-runner" - "--output-file" - "aeron-echo_c-dpdk-k8s" - "--message-rate" - - "100K" + - "101K" - "--message-length" - "288" - "--iterations" - "60" - - "aeron/echo-client" diff --git a/scripts/k8s/aeron-echo-dpdk/kustomization.yml b/scripts/k8s/aeron-echo-dpdk/kustomization.yml index 27ad9b13..53ce59a7 100644 --- a/scripts/k8s/aeron-echo-dpdk/kustomization.yml +++ b/scripts/k8s/aeron-echo-dpdk/kustomization.yml @@ -3,11 +3,16 @@ resources: - ../base patches: - - path: echo-server.yml + # Media driver specific config + - path: aeronmd-dpdk.yml target: kind: Pod - name: aeron-benchmark-0 + # Settings for the benchmark - path: echo-client.yml target: kind: Pod name: aeron-benchmark-1 + # Local environment specific overrides + - target: + kind: Pod + path: settings.yml diff --git a/scripts/k8s/aeron-echo-dpdk/settings.yml b/scripts/k8s/aeron-echo-dpdk/settings.yml new file mode 100644 index 00000000..327462dd --- /dev/null +++ b/scripts/k8s/aeron-echo-dpdk/settings.yml @@ -0,0 +1,17 @@ +--- +# Set test specific config +# eg: +# * image refs +# * DPDK interface specifics +apiVersion: v1 +kind: Pod +metadata: + name: all +spec: + containers: + - name: aeronmd-dpdk + image: "< Ref to Aeron DPDK media driver container >" + resources: + limits: + # Give us a DPDK NIC through https://github.com/AdaptiveConsulting/k8s-dpdk-mgr + intel.com/aws_dpdk: "1" diff --git a/scripts/k8s/aeron-echo-java/aeronmd-java.yml b/scripts/k8s/aeron-echo-java/aeronmd-java.yml new file mode 100644 index 00000000..cb68d5bd --- /dev/null +++ b/scripts/k8s/aeron-echo-java/aeronmd-java.yml @@ -0,0 +1,19 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: all + labels: + aeronmd: java +spec: + containers: + - name: aeronmd-java + imagePullPolicy: Always + command: ["/opt/aeron-benchmarks/scripts/k8s/k8s-java-media-driver.sh"] + volumeMounts: + - name: shm + mountPath: /dev/shm + resources: + limits: + cpu: "8" + memory: 8G diff --git a/scripts/k8s/aeron-echo-java/echo-client.yml b/scripts/k8s/aeron-echo-java/echo-client.yml new file mode 100644 index 00000000..86145fff --- /dev/null +++ b/scripts/k8s/aeron-echo-java/echo-client.yml @@ -0,0 +1,19 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: all +spec: + containers: + - name: benchmark + command: ["k8s/k8s-echo-client.sh"] + # These are options passed to the benchmark-runner script + args: + - "--output-file" + - "aeron-echo_java-k8s" + - "--message-rate" + - "101K" + - "--message-length" + - "288" + - "--iterations" + - "60" diff --git a/scripts/k8s/aeron-echo-java/kustomization.yml b/scripts/k8s/aeron-echo-java/kustomization.yml new file mode 100644 index 00000000..a4ceb11f --- /dev/null +++ b/scripts/k8s/aeron-echo-java/kustomization.yml @@ -0,0 +1,18 @@ +# This file contains the overrides for aeron-echo-dpdk test + +resources: + - ../base +patches: + # Media driver specific config + - path: aeronmd-java.yml + target: + kind: Pod + # Settings for the benchmark + - path: echo-client.yml + target: + kind: Pod + name: aeron-benchmark-1 + # Local environment specific settings + - target: + kind: Pod + path: settings.yml diff --git a/scripts/k8s/aeron-echo-java/settings.yml b/scripts/k8s/aeron-echo-java/settings.yml new file mode 100644 index 00000000..2ed95f11 --- /dev/null +++ b/scripts/k8s/aeron-echo-java/settings.yml @@ -0,0 +1,15 @@ +--- +# Set test specific config +# The image ref must be set +apiVersion: v1 +kind: Pod +metadata: + name: all +spec: + # Uncomment me to do host network testing + # dnsPolicy: ClusterFirstWithHostNet + # hostNetwork: true + containers: + - name: aeronmd-java + # This is the benchmarks container with an alternate entrypoint + image: "< Ref to benchmark container >" diff --git a/scripts/k8s/base/base-containers.yml b/scripts/k8s/base/base-containers.yml new file mode 100644 index 00000000..3dc7fdcf --- /dev/null +++ b/scripts/k8s/base/base-containers.yml @@ -0,0 +1,62 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: all + labels: + app.kubernetes.io/name: aeron-benchmark +spec: + # The benchmark code runs exactly once and isn't restarted + restartPolicy: Never + terminationGracePeriodSeconds: 5 + volumes: + - name: hugepage-2mi + emptyDir: + medium: HugePages-2Mi + - name: shm + emptyDir: + medium: Memory + sizeLimit: 2Gi + # Common properties files + - name: config-volume + configMap: + name: aeron-benchmark + items: + - key: low-latency-driver.properties + path: low-latency-driver.properties + containers: + # Media driver containers come from the Kustomization files in the scenarios + - name: benchmark + imagePullPolicy: Always + envFrom: + - configMapRef: + name: aeron-benchmark-envs + volumeMounts: + - mountPath: /hugepages-2Mi + name: hugepage-2mi + - mountPath: /dev/shm + name: shm + - name: config-volume + mountPath: /opt/aeron-config + resources: + limits: + cpu: "8" + memory: 10G + hugepages-2Mi: 1Gi + + - name: results + image: busybox + command: + - "sleep" + - "infinity" + envFrom: + - configMapRef: + name: aeron-benchmark-envs + volumeMounts: + - name: shm + mountPath: /dev/shm + resources: + # Must set limits, as whole Pod must have requests = limits to get into Guarenteed QoS class + limits: + cpu: "1" + memory: 512Mi diff --git a/scripts/k8s/aeron-echo-dpdk/echo-server.yml b/scripts/k8s/base/echo-server.yml similarity index 68% rename from scripts/k8s/aeron-echo-dpdk/echo-server.yml rename to scripts/k8s/base/echo-server.yml index 0c11822c..f118a7a5 100644 --- a/scripts/k8s/aeron-echo-dpdk/echo-server.yml +++ b/scripts/k8s/base/echo-server.yml @@ -6,4 +6,4 @@ metadata: spec: containers: - name: benchmark - args: ["aeron/echo-server"] + command: ["k8s/k8s-echo-server.sh"] diff --git a/scripts/k8s/base/k8s-benchmark.yml b/scripts/k8s/base/k8s-benchmark.yml index 60d86877..10d0d285 100644 --- a/scripts/k8s/base/k8s-benchmark.yml +++ b/scripts/k8s/base/k8s-benchmark.yml @@ -1,5 +1,7 @@ --- # This service is deliberately empty, and updated via an EndpointSlice +# as part of the benchmark process. +# DPDK interfaces do not have an IP visible to K8s by default # https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/ apiVersion: v1 kind: Service @@ -18,6 +20,8 @@ kind: ConfigMap metadata: name: aeron-benchmark data: + # Only used with DPDK at present, as the java-md/benchmark code ships with it's own + # version of this file low-latency-driver.properties: | aeron.dir.delete.on.start=true aeron.dir.delete.on.shutdown=true @@ -68,177 +72,10 @@ apiVersion: v1 kind: Pod metadata: name: aeron-benchmark-0 -spec: - # The benchmark code runs exactly once and isn't restarted - restartPolicy: Never - terminationGracePeriodSeconds: 5 - volumes: - - name: hugepage-2mi - emptyDir: - medium: HugePages-2Mi - - name: shm - emptyDir: - medium: Memory - sizeLimit: 2Gi - # Common properties files - - name: config-volume - configMap: - name: aeron-benchmark - items: - - key: low-latency-driver.properties - path: low-latency-driver.properties - - # hostNetwork: true - # dnsPolicy: ClusterFirstWithHostNet - containers: - - name: benchmark - imagePullPolicy: Always - envFrom: - - configMapRef: - name: aeron-benchmark-envs - volumeMounts: - - mountPath: /hugepages-2Mi - name: hugepage-2mi - - mountPath: /dev/shm - name: shm - - name: config-volume - mountPath: /opt/aeron-config - resources: - limits: - cpu: "8" - memory: 10G - hugepages-2Mi: 1Gi - - - name: aeronmd-dpdk - imagePullPolicy: Always - args: - - "--no-telemetry" - - "/opt/aeron-config/low-latency-driver.properties" - securityContext: - capabilities: - # Required for DPDK - add: - - IPC_LOCK - - SYS_RAWIO - - SYS_ADMIN - envFrom: - - configMapRef: - name: aeron-benchmark-envs-dpdk - volumeMounts: - - name: hugepage-2mi - mountPath: /hugepages-2Mi - - name: shm - mountPath: /dev/shm - - name: config-volume - mountPath: /opt/aeron-config - resources: - limits: - cpu: "8" - memory: 8G - hugepages-2Mi: 1Gi - - # This is to give a persistant endpoint to retrieve the results from once the benchmark has finished and exited - - name: results - image: busybox - command: - - "sleep" - - "infinity" - envFrom: - - configMapRef: - name: aeron-benchmark-envs - volumeMounts: - - name: shm - mountPath: /dev/shm - resources: - # Must set limits, as whole Pod must have requests = limits to get into Guarenteed QoS class - limits: - cpu: "1" - memory: 512Mi - -################################################################################################### +# Populated throught base-containers.yml kustomization --- apiVersion: v1 kind: Pod metadata: name: aeron-benchmark-1 -spec: - restartPolicy: Never - terminationGracePeriodSeconds: 5 - volumes: - - name: hugepage-2mi - emptyDir: - medium: HugePages-2Mi - - name: shm - emptyDir: - medium: Memory - sizeLimit: 2Gi - # Common properties files - - name: config-volume - configMap: - name: aeron-benchmark - items: - - key: low-latency-driver.properties - path: low-latency-driver.properties - # hostNetwork: true - # dnsPolicy: ClusterFirstWithHostNet - containers: - - name: benchmark - imagePullPolicy: Always - envFrom: - - configMapRef: - name: aeron-benchmark-envs - volumeMounts: - - mountPath: /hugepages-2Mi - name: hugepage-2mi - - mountPath: /dev/shm - name: shm - - name: config-volume - mountPath: /opt/aeron-config - resources: - limits: - cpu: "8" - memory: 10G - hugepages-2Mi: 1Gi - - - name: aeronmd-dpdk - imagePullPolicy: Always - args: - - "--no-telemetry" - - "/opt/aeron-config/low-latency-driver.properties" - securityContext: - capabilities: - # Required for DPDK - add: - - IPC_LOCK - - SYS_RAWIO - - SYS_ADMIN - envFrom: - - configMapRef: - name: aeron-benchmark-envs-dpdk - volumeMounts: - - name: hugepage-2mi - mountPath: /hugepages-2Mi - - name: shm - mountPath: /dev/shm - - name: config-volume - mountPath: /opt/aeron-config - resources: - limits: - cpu: "8" - memory: 8G - hugepages-2Mi: 1Gi - - # This is to give a persistant endpoint to retrieve the results from once the benchmark has finished and exited - - name: results - image: busybox - command: - - "sleep" - - "infinity" - volumeMounts: - - name: shm - mountPath: /dev/shm - resources: - # Must set limits, as whole Pod must have requests = limits to get into Guarenteed QoS class - limits: - cpu: "1" - memory: 512Mi +# Populated throught base-containers.yml kustomization diff --git a/scripts/k8s/base/kustomization.yml b/scripts/k8s/base/kustomization.yml index 36aea702..778ee663 100644 --- a/scripts/k8s/base/kustomization.yml +++ b/scripts/k8s/base/kustomization.yml @@ -3,6 +3,17 @@ resources: - k8s-benchmark.yml patches: + # Standard config for both the pods + - path: base-containers.yml + target: + kind: Pod + + # Aeron echo specific config + - path: echo-server.yml + target: + kind: Pod + name: aeron-benchmark-0 + - path: settings.yml target: kind: Pod diff --git a/scripts/k8s/base/settings.yml b/scripts/k8s/base/settings.yml index 20da04f2..f890c8c7 100644 --- a/scripts/k8s/base/settings.yml +++ b/scripts/k8s/base/settings.yml @@ -15,13 +15,5 @@ spec: containers: - name: benchmark - image: + image: "< Ref to benchmark container >" imagePullPolicy: Always - - - name: aeronmd-dpdk - image: - imagePullPolicy: Always - resources: - limits: - # Give us a DPDK NIC through https://github.com/AdaptiveConsulting/k8s-dpdk-mgr - intel.com/aws_dpdk: "1" diff --git a/scripts/k8s/k8s-benchmark-entrypoint.sh b/scripts/k8s/k8s-benchmark-entrypoint.sh deleted file mode 100755 index d1e08043..00000000 --- a/scripts/k8s/k8s-benchmark-entrypoint.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -# This runs on the K8s node to perform the benchmarking - -set -eo pipefail - -echo '***********************************' -echo "*** Running $* on ${HOSTNAME} ***" -echo '***********************************' - -# Create the results dir -mkdir -p "${TEST_OUTPUT_PATH:? Please set TEST_OUTPUT_PATH}" -cd "${BENCHMARKS_PATH:? Please set BENCHMARKS_PATH}/scripts" - -# Collecting environment info -./collect-environment-info "${TEST_OUTPUT_PATH}" - -# Verify we can get DNS records for the pods -until host "${NODE0_ADDRESS:? Please set NODE0_ADDRESS}" -do - echo "waiting for DNS for ${NODE0_ADDRESS:? Please set NODE0_ADDRESS}" - sleep 5 -done -until host "${NODE1_ADDRESS:? Please set NODE1_ADDRESS}" -do - echo "waiting for DNS for ${NODE1_ADDRESS:? Please set NODE1_ADDRESS}" - sleep 5 -done - -echo '*******************************' -echo "JVM_OPTS:" -echo "${JVM_OPTS:? Please set JVM_OPTS}" | sed "s/ /\n/g" -echo '*******************************' -# Run our command args -"$@" - -if [ -z "$(ls -A ${TEST_OUTPUT_PATH})" ]; then - echo "No test output found" -else - parent_dir="$(dirname "${TEST_OUTPUT_PATH}")" - results_dir="$(basename "${TEST_OUTPUT_PATH}")" - - # Check if we've got plottable results - if ls "${TEST_OUTPUT_PATH}" | grep -Eq '.hdr$' - then - echo "Generating summary" - "${BENCHMARKS_PATH}/scripts/aggregate-results" "${TEST_OUTPUT_PATH}" - echo "Generating graph" - "${BENCHMARKS_PATH}/scripts/results-plotter.py" "${TEST_OUTPUT_PATH}" - fi - - echo "Creating results tarball: ${parent_dir}/results.tar.gz" - tar -C "${parent_dir}" -czf "${parent_dir}/results.tar.gz" "${results_dir}" -fi diff --git a/scripts/k8s/k8s-common b/scripts/k8s/k8s-common new file mode 100644 index 00000000..efeb9a70 --- /dev/null +++ b/scripts/k8s/k8s-common @@ -0,0 +1,133 @@ +#!/bin/bash + +# common functions for K8s benchmarking + +##################################################################################### +# Pins a given thread to a given cpu +function f_pin_thread() { + thread_name="$1" + cpu_core="$2" + + if mypid="$(ps --no-headers -eL -o tid,comm | grep " ${thread_name}" | awk '{print $1}')" + then true + else + echo "** No such thread: ${thread_name}" + exit 1 + fi + + echo "** Setting thread ${thread_name} affinity to cpu ${cpu_core} **" + taskset -pc "${cpu_core}" "${mypid}" +} + +##################################################################################### +# Wait for a process to start +function f_wait_for_process() { + my_process="${1:? Please supply the process name to wait for}" + until pgrep -l -f "${my_process}" + do + echo "** Waiting for process start: ${my_process} **" + sleep 2 + done + sleep 2 +} + +##################################################################################### +# Waits for all the DNS names to be resolvable +function f_wait_for_dns() { + + # Verify we can get DNS records for the pods + until host "${NODE0_ADDRESS:? Please set NODE0_ADDRESS}" + do + echo "waiting for DNS for ${NODE0_ADDRESS:? Please set NODE0_ADDRESS}" + sleep 5 + done + until host "${NODE1_ADDRESS:? Please set NODE1_ADDRESS}" + do + echo "waiting for DNS for ${NODE1_ADDRESS:? Please set NODE1_ADDRESS}" + sleep 5 + done + +} + +##################################################################################### +# Things to run *before* a benchmark client runs +function f_benchmark_pre(){ + +echo '***********************************' +echo "*** Running $* on ${HOSTNAME} ***" +echo '***********************************' + +# Sets our own bash script affinity to the first core +taskset -cp "${CGROUP_CPUSETS[0]}" $$ + +# Create the results dir +mkdir -p "${TEST_OUTPUT_PATH:? Please set TEST_OUTPUT_PATH}" +cd "${BENCHMARKS_PATH:? Please set BENCHMARKS_PATH}/scripts" + +# Collecting environment info +./collect-environment-info "${TEST_OUTPUT_PATH}" + +# Verify we can get DNS records for the pods +f_wait_for_dns + +echo '*******************************' +echo "JVM_OPTS:" +echo "${JVM_OPTS:? Please set JVM_OPTS}" | sed "s/ /\n/g" +echo '*******************************' + +} + + +##################################################################################### +# Things to run *after* a benchmark client runs +function f_benchmark_post(){ + + if [ -z "$(ls -A "${TEST_OUTPUT_PATH:? Needs to be set}")" ]; then + echo "No test output found" + else + parent_dir="$(dirname "${TEST_OUTPUT_PATH}")" + results_dir="$(basename "${TEST_OUTPUT_PATH}")" + + # Check if we've got plottable results + if ls "${TEST_OUTPUT_PATH}" | grep -Eq '.hdr$' + then + echo "Generating summary" + "${BENCHMARKS_PATH}/scripts/aggregate-results" "${TEST_OUTPUT_PATH}" + echo "Generating graph" + "${BENCHMARKS_PATH}/scripts/results-plotter.py" "${TEST_OUTPUT_PATH}" + fi + + echo "Creating results tarball: ${parent_dir}/results.tar.gz" + tar -C "${parent_dir}" -czf "${parent_dir}/results.tar.gz" "${results_dir}" + fi + + +} + +##################################################################################### +# CPUsets vars - this allways runs +if test -e "/sys/fs/cgroup/cpuset/cpuset.cpus" +then + echo "** Cgroup cpuset v1 found **" + export CPUSET_FILE="/sys/fs/cgroup/cpuset/cpuset.cpus" +elif test -e "/sys/fs/cgroup/cpuset.cpus.effective" +then + echo "** Cgroup cpuset v2 found **" + export CPUSET_FILE="/sys/fs/cgroup/cpuset.cpus.effective" +else + echo "** WARNING: Cgroup cpuset lookup requested, but no v1 or v2 cpuset found **" +fi + +# List of CPUs in our Cgroup cpuset +CGROUP_CPUSETS_STRING="$(awk '/-/{for (i=$1; i<=$2; i++)printf "%s%s",i,ORS;next} 1' ORS=' ' RS=, FS=- ${CPUSET_FILE})" +# Convert to an array +CGROUP_CPUSETS=(${CGROUP_CPUSETS_STRING}) +# Check we've got at least 4 cpus +if [[ "${#CGROUP_CPUSETS[@]}" -lt 4 ]] +then + echo "** FATAL: Cgroup cpuset only contains ${#CGROUP_CPUSETS[@]} cpus, need a minimum of 4 **" + return 1 +fi + +echo "** Container CPUset: ${CGROUP_CPUSETS_STRING} **" +echo "***********************************" diff --git a/scripts/k8s/k8s-echo-client.sh b/scripts/k8s/k8s-echo-client.sh new file mode 100755 index 00000000..f497b5b1 --- /dev/null +++ b/scripts/k8s/k8s-echo-client.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Starts the echo client +# Enabled bash job control! +set -emo pipefail + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +source "${DIR}/k8s-common" + +# Do our benchmark pre-setup +f_benchmark_pre + +# Starts the process with affinity set to the first core +echo "** Starting with echo client base cpu core ${CGROUP_CPUSETS[1]}" +# Pass in the benchmark client args +taskset -c "${CGROUP_CPUSETS[1]}" \ +"${DIR}/../benchmark-runner" \ +"$@" aeron/echo-client & + +# Wait for the Java process to be up +f_wait_for_process 'uk.co.real_logic.benchmarks.remote.LoadTestRig' + +# Sets the affinities for high performance threads +f_pin_thread "load-test-rig" "${CGROUP_CPUSETS[2]}" + +# Wait for all background tasks +fg + +# Do our post-benchmark work +f_benchmark_post diff --git a/scripts/k8s/k8s-echo-server.sh b/scripts/k8s/k8s-echo-server.sh new file mode 100755 index 00000000..06c2e80d --- /dev/null +++ b/scripts/k8s/k8s-echo-server.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Starts the echo server +# Enabled bash job control! +set -emo pipefail + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +source "${DIR}/k8s-common" + +# Do benchmark pre-work +f_benchmark_pre + +# Starts the echo server with a restricted CPU affinity +echo "** Starting with base cpu core ${CGROUP_CPUSETS[1]}" +taskset -c "${CGROUP_CPUSETS[1]}" "${DIR}/../aeron/echo-server" & + +# Wait for Java process to be up +f_wait_for_process 'uk.co.real_logic.benchmarks.aeron.remote.EchoNode' + +# Sets the affinities main echo thread +f_pin_thread "echo" "${CGROUP_CPUSETS[2]}" + +# Wait for all background tasks +fg + +# Do our post-benchmark work +f_benchmark_post diff --git a/scripts/k8s/k8s-java-media-driver.sh b/scripts/k8s/k8s-java-media-driver.sh new file mode 100755 index 00000000..6d161e80 --- /dev/null +++ b/scripts/k8s/k8s-java-media-driver.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Starts the java media driver +# Enabled bash job control! +set -emo pipefail + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +source "${DIR}/k8s-common" + +# Sets our own bash script affinity to the first core +taskset -cp "${CGROUP_CPUSETS[0]}" $$ + +# Starts the media driver with affinity set to the first core +echo "** Starting with base cpu core ${CGROUP_CPUSETS[0]}" +taskset -c "${CGROUP_CPUSETS[0]}" "${DIR}/../aeron/media-driver" & + +# Wait for Java process to be up +f_wait_for_process 'io.aeron.driver.MediaDriver' + +# Sets the affinities for the rest of the threads +f_pin_thread "driver-conducto" "${CGROUP_CPUSETS[1]}" +f_pin_thread "sender" "${CGROUP_CPUSETS[2]}" +f_pin_thread "receiver" "${CGROUP_CPUSETS[3]}" + +# Wait for all background tasks +fg