From 15993d6192287502154b117877ccc2c54c3a9e72 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Tue, 6 May 2025 15:44:42 +0100
Subject: [PATCH 1/9] chore: run black fmt on /tests

Run formatter on python.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 tests/framework/ab_test.py                                    | 2 +-
 tests/host_tools/fcmetrics.py                                 | 4 ++--
 .../build/test_seccomp_no_redundant_rules.py                  | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
index 2ef3e2350a7..66690f910fa 100644
--- a/tests/framework/ab_test.py
+++ b/tests/framework/ab_test.py
@@ -160,7 +160,7 @@ def git_ab_test_host_command(
 
 
 def set_did_not_grow_comparator(
-    set_generator: Callable[[CommandReturn], set]
+    set_generator: Callable[[CommandReturn], set],
 ) -> Callable[[CommandReturn, CommandReturn], bool]:
     """Factory function for comparators to use with git_ab_test_command that converts the command output to sets
     (using the given callable) and then checks that the "B" set is a subset of the "A" set
diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py
index 47661d5b27d..7c984dca3dd 100644
--- a/tests/host_tools/fcmetrics.py
+++ b/tests/host_tools/fcmetrics.py
@@ -3,8 +3,8 @@
 
 
 """Provides:
-    - Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
-    - Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
+- Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
+- Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
 """
 
 import datetime
diff --git a/tests/integration_tests/build/test_seccomp_no_redundant_rules.py b/tests/integration_tests/build/test_seccomp_no_redundant_rules.py
index a61b86b5547..cd090e5afa8 100644
--- a/tests/integration_tests/build/test_seccomp_no_redundant_rules.py
+++ b/tests/integration_tests/build/test_seccomp_no_redundant_rules.py
@@ -1,7 +1,8 @@
 # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 """A test that fails if it can definitely prove a seccomp rule redundant
-(although it passing does not guarantee the converse, that all rules are definitely needed)."""
+(although it passing does not guarantee the converse, that all rules are definitely needed).
+"""
 import platform
 from pathlib import Path
 

From 7442f6150255edde25dcad6e941afccb871977f6 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Fri, 2 May 2025 16:01:40 +0100
Subject: [PATCH 2/9] feat(ci): store integration test metrics in metrics.json

For each test store metrics it emits into a metrics.json
file. This makes it easier to look at the metrics.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 tests/conftest.py           | 26 +++++++++++++-----
 tests/host_tools/metrics.py | 53 +++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 6dedf6804dc..31ef78f38b1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -172,7 +172,7 @@ def pytest_runtest_logreport(report):
 
 
 @pytest.fixture()
-def metrics(request):
+def metrics(results_dir, request):
     """Fixture to pass the metrics scope
 
     We use a fixture instead of the @metrics_scope decorator as that conflicts
@@ -188,6 +188,8 @@ def metrics(request):
         metrics_logger.set_property(prop_name, prop_val)
     yield metrics_logger
     metrics_logger.flush()
+    if results_dir:
+        metrics_logger.store_data(results_dir)
 
 
 @pytest.fixture
@@ -391,17 +393,29 @@ def results_dir(request):
     """
     Fixture yielding the path to a directory into which the test can dump its results
 
-    Directories are unique per test, and named after the test name. Everything the tests puts
-    into its directory will to be uploaded to S3. Directory will be placed inside defs.TEST_RESULTS_DIR.
+    Directories are unique per test, and their names include test name and test parameters.
+    Everything the tests puts into its directory will to be uploaded to S3.
+    Directory will be placed inside defs.TEST_RESULTS_DIR.
 
     For example
     ```py
-    def test_my_file(results_dir):
+    @pytest.mark.parametrize("p", ["a", "b"])
+    def test_my_file(p, results_dir):
         (results_dir / "output.txt").write_text("Hello World")
     ```
-    will result in `defs.TEST_RESULTS_DIR`/test_my_file/output.txt.
+    will result in:
+    - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[a]/output.txt.
+    - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[b]/output.txt.
+
+    When this fixture is called with DoctestItem as a request.node
+    during doc tests, it will return None.
     """
-    results_dir = defs.TEST_RESULTS_DIR / request.node.originalname
+    try:
+        results_dir = (
+            defs.TEST_RESULTS_DIR / request.node.originalname / request.node.name
+        )
+    except AttributeError:
+        return None
     results_dir.mkdir(parents=True, exist_ok=True)
     return results_dir
 
diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
index 685b1fa1962..9bf3d5f19c2 100644
--- a/tests/host_tools/metrics.py
+++ b/tests/host_tools/metrics.py
@@ -46,52 +46,59 @@
 import json
 import os
 import socket
+from pathlib import Path
 from urllib.parse import urlparse
 
 from aws_embedded_metrics.constants import DEFAULT_NAMESPACE
 from aws_embedded_metrics.logger.metrics_logger_factory import create_metrics_logger
 
 
-class MetricsWrapperDummy:
-    """Send metrics to /dev/null"""
+class MetricsWrapper:
+    """A convenient metrics logger"""
+
+    def __init__(self, logger):
+        self.data = {}
+        self.logger = logger
 
     def set_dimensions(self, *args, **kwargs):
         """Set dimensions"""
+        if self.logger:
+            self.logger.set_dimensions(*args, **kwargs)
 
-    def put_metric(self, *args, **kwargs):
+    def put_metric(self, name, data, unit):
         """Put a datapoint with given dimensions"""
+        if name not in self.data:
+            self.data[name] = {"unit": unit, "values": []}
+        self.data[name]["values"].append(data)
+
+        if self.logger:
+            self.logger.put_metric(name, data, unit)
 
     def set_property(self, *args, **kwargs):
         """Set a property"""
+        if self.logger:
+            self.logger.set_property(*args, **kwargs)
 
     def flush(self):
         """Flush any remaining metrics"""
+        if self.logger:
+            asyncio.run(self.logger.flush())
 
-
-class MetricsWrapper:
-    """A convenient metrics logger"""
-
-    def __init__(self, logger):
-        self.logger = logger
-
-    def __getattr__(self, attr):
-        """Dispatch methods to logger instance"""
-        if attr not in self.__dict__:
-            return getattr(self.logger, attr)
-        return getattr(self, attr)
-
-    def flush(self):
-        """Flush any remaining metrics"""
-        asyncio.run(self.logger.flush())
+    def store_data(self, dir_path):
+        """Store data into a file"""
+        metrics_path = Path(dir_path / "metrics.json")
+        with open(metrics_path, "w", encoding="utf-8") as f:
+            json.dump(self.data, f)
 
 
 def get_metrics_logger():
     """Get a new metrics logger object"""
     # if no metrics namespace, don't output metrics
-    if "AWS_EMF_NAMESPACE" not in os.environ:
-        return MetricsWrapperDummy()
-    logger = create_metrics_logger()
-    logger.reset_dimensions(False)
+    if "AWS_EMF_NAMESPACE" in os.environ:
+        logger = create_metrics_logger()
+        logger.reset_dimensions(False)
+    else:
+        logger = None
     return MetricsWrapper(logger)
 
 

From 78df75104f48df407a0f036e4600e10c75c87814 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Fri, 2 May 2025 13:22:59 +0100
Subject: [PATCH 3/9] feat(ci): save block perf tests data

Save logs and json files for block perf tests.

For log parsing, specify the root_dir for `glob.glob`
otherwise it wll struggle with directory names like:
`test_block_performance[vmlinux-5.10.233-Sync-bs4096-randread-1vcpu]`

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 .../performance/test_block_ab.py              | 46 ++++++++++---------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
index f38872cdc94..2068549b3d8 100644
--- a/tests/integration_tests/performance/test_block_ab.py
+++ b/tests/integration_tests/performance/test_block_ab.py
@@ -5,7 +5,6 @@
 import concurrent
 import glob
 import os
-import shutil
 from pathlib import Path
 
 import pytest
@@ -45,7 +44,7 @@ def prepare_microvm_for_test(microvm):
     check_output("echo 3 > /proc/sys/vm/drop_caches")
 
 
-def run_fio(microvm, mode, block_size, fio_engine="libaio"):
+def run_fio(microvm, mode, block_size, test_output_dir, fio_engine="libaio"):
     """Run a fio test in the specified mode with block size bs."""
     cmd = (
         CmdBuilder("fio")
@@ -71,16 +70,11 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
         .with_arg(f"--write_bw_log={mode}")
         .with_arg(f"--write_lat_log={mode}")
         .with_arg("--log_avg_msec=1000")
+        .with_arg("--output-format=json+")
+        .with_arg("--output=/tmp/fio.json")
         .build()
     )
 
-    logs_path = Path(microvm.jailer.chroot_base_with_id()) / "fio_output"
-
-    if logs_path.is_dir():
-        shutil.rmtree(logs_path)
-
-    logs_path.mkdir()
-
     prepare_microvm_for_test(microvm)
 
     # Start the CPU load monitor.
@@ -97,17 +91,23 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
         assert rc == 0, stderr
         assert stderr == ""
 
-        microvm.ssh.scp_get("/tmp/*.log", logs_path)
-        microvm.ssh.check_output("rm /tmp/*.log")
+        microvm.ssh.scp_get("/tmp/fio.json", test_output_dir)
+        microvm.ssh.scp_get("/tmp/*.log", test_output_dir)
 
-        return logs_path, cpu_load_future.result()
+        return cpu_load_future.result()
 
 
-def process_fio_log_files(logs_glob):
-    """Parses all fio log files matching the given glob and yields tuples of same-timestamp read and write metrics"""
+def process_fio_log_files(root_dir, logs_glob):
+    """
+    Parses all fio log files in the root_dir matching the given glob and
+    yields tuples of same-timestamp read and write metrics
+    """
+    # We specify `root_dir` for `glob.glob` because otherwise it will
+    # struggle with directory with names like:
+    # test_block_performance[vmlinux-5.10.233-Sync-bs4096-randread-1vcpu]
     data = [
-        Path(pathname).read_text("UTF-8").splitlines()
-        for pathname in glob.glob(logs_glob)
+        Path(root_dir / pathname).read_text("UTF-8").splitlines()
+        for pathname in glob.glob(logs_glob, root_dir=root_dir)
     ]
 
     assert data, "no log files found!"
@@ -134,13 +134,13 @@ def process_fio_log_files(logs_glob):
 
 def emit_fio_metrics(logs_dir, metrics):
     """Parses the fio logs in `{logs_dir}/*_[clat|bw].*.log and emits their contents as CloudWatch metrics"""
-    for bw_read, bw_write in process_fio_log_files(f"{logs_dir}/*_bw.*.log"):
+    for bw_read, bw_write in process_fio_log_files(logs_dir, "*_bw.*.log"):
         if bw_read:
             metrics.put_metric("bw_read", sum(bw_read), "Kilobytes/Second")
         if bw_write:
             metrics.put_metric("bw_write", sum(bw_write), "Kilobytes/Second")
 
-    for lat_read, lat_write in process_fio_log_files(f"{logs_dir}/*_clat.*.log"):
+    for lat_read, lat_write in process_fio_log_files(logs_dir, "*_clat.*.log"):
         # latency values in fio logs are in nanoseconds, but cloudwatch only supports
         # microseconds as the more granular unit, so need to divide by 1000.
         for value in lat_read:
@@ -164,6 +164,7 @@ def test_block_performance(
     fio_engine,
     io_engine,
     metrics,
+    results_dir,
 ):
     """
     Execute block device emulation benchmarking scenarios.
@@ -192,9 +193,9 @@ def test_block_performance(
 
     vm.pin_threads(0)
 
-    logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size, fio_engine)
+    cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir, fio_engine)
 
-    emit_fio_metrics(logs_dir, metrics)
+    emit_fio_metrics(results_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values:
@@ -213,6 +214,7 @@ def test_block_vhost_user_performance(
     fio_mode,
     fio_block_size,
     metrics,
+    results_dir,
 ):
     """
     Execute block device emulation benchmarking scenarios.
@@ -242,9 +244,9 @@ def test_block_vhost_user_performance(
     next_cpu = vm.pin_threads(0)
     vm.disks_vhost_user["scratch"].pin(next_cpu)
 
-    logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size)
+    cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir)
 
-    emit_fio_metrics(logs_dir, metrics)
+    emit_fio_metrics(results_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values:

From c086ad2cb9ce2e451618c2c998b7fa050c3baf5f Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Fri, 2 May 2025 14:20:04 +0100
Subject: [PATCH 4/9] feat(ci): save network perf tests data

Save logs and json files for network perf tests.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 .../integration_tests/performance/test_network_ab.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
index 3a50d864544..3355d54c2bc 100644
--- a/tests/integration_tests/performance/test_network_ab.py
+++ b/tests/integration_tests/performance/test_network_ab.py
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Tests the network latency of a Firecracker guest."""
 
+import json
 import re
+from pathlib import Path
 
 import pytest
 
@@ -95,6 +97,7 @@ def test_network_tcp_throughput(
     payload_length,
     mode,
     metrics,
+    results_dir,
 ):
     """
     Iperf between guest and host in both directions for TCP workload.
@@ -133,4 +136,13 @@ def test_network_tcp_throughput(
     )
     data = test.run_test(network_microvm.vcpus_count + 2)
 
+    for i, g2h in enumerate(data["g2h"]):
+        Path(results_dir / f"g2h_{i}.json").write_text(
+            json.dumps(g2h), encoding="utf-8"
+        )
+    for i, h2g in enumerate(data["h2g"]):
+        Path(results_dir / f"h2g_{i}.json").write_text(
+            json.dumps(h2g), encoding="utf-8"
+        )
+
     emit_iperf3_metrics(metrics, data, warmup_sec)

From a473ac81d9742a5f767772022b7a618c4cad9fd2 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Fri, 2 May 2025 15:35:10 +0100
Subject: [PATCH 5/9] feat(ci): save vsock perf tests data

Save logs and json files for vsock perf tests.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 .../performance/test_vsock_ab.py              | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock_ab.py
index bcee05528af..bad4436e568 100644
--- a/tests/integration_tests/performance/test_vsock_ab.py
+++ b/tests/integration_tests/performance/test_vsock_ab.py
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Tests the VSOCK throughput of Firecracker uVMs."""
 
+import json
 import os
+from pathlib import Path
 
 import pytest
 
@@ -73,7 +75,14 @@ def guest_command(self, port_offset):
 @pytest.mark.parametrize("payload_length", ["64K", "1024K"], ids=["p64K", "p1024K"])
 @pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
 def test_vsock_throughput(
-    microvm_factory, guest_kernel_acpi, rootfs, vcpus, payload_length, mode, metrics
+    microvm_factory,
+    guest_kernel_acpi,
+    rootfs,
+    vcpus,
+    payload_length,
+    mode,
+    metrics,
+    results_dir,
 ):
     """
     Test vsock throughput for multiple vm configurations.
@@ -107,4 +116,13 @@ def test_vsock_throughput(
     test = VsockIPerf3Test(vm, mode, payload_length)
     data = test.run_test(vm.vcpus_count + 2)
 
+    for i, g2h in enumerate(data["g2h"]):
+        Path(results_dir / f"g2h_{i}.json").write_text(
+            json.dumps(g2h), encoding="utf-8"
+        )
+    for i, h2g in enumerate(data["h2g"]):
+        Path(results_dir / f"h2g_{i}.json").write_text(
+            json.dumps(h2g), encoding="utf-8"
+        )
+
     emit_iperf3_metrics(metrics, data, VsockIPerf3Test.WARMUP_SEC)

From 25b527df2c69bfd5364161c94611881e4b20792a Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Fri, 2 May 2025 17:39:43 +0100
Subject: [PATCH 6/9] feat(devtool): compress test_results for CI uploads

Now `test_results` will contain a lot of files and directories
with all test metrics and data files.
To speed up upload and download of test_results, compress it's
content if tests are run in CI.
Also add an `--no-archive` option to skip archiving if needed.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 tools/devtool | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/devtool b/tools/devtool
index f86263c4731..b9543aa6b54 100755
--- a/tools/devtool
+++ b/tools/devtool
@@ -681,6 +681,7 @@ unapply_performance_tweaks() {
 cmd_test() {
     do_ab_test=0
     do_build=1
+    do_archive=1
     do_kvm_check=1
     # Parse any command line args.
     while [ $# -gt 0 ]; do
@@ -703,6 +704,9 @@ cmd_test() {
             "--no-build")
                 do_build=0
                 ;;
+            "--no-archive")
+                do_archive=0
+                ;;
             "--no-kvm-check")
                 do_kvm_check=0
                 ;;
@@ -802,6 +806,14 @@ cmd_test() {
     # do not leave behind env.list file
     rm env.list
 
+    # archive everything in the `test_result` to speed up upload/download
+    # to s3 if we are in CI
+    if [ $do_archive != 0 ] && [ -n "$BUILDKITE" ] && [ "$BUILDKITE" = "true" ]; then
+      tar -czf data.tar.gz -C test_results .
+      rm -r test_results/*
+      mv data.tar.gz test_results
+    fi
+
     return $ret
 }
 

From e138d4372f7dd78645857d88cc7ce48c25fbc4d3 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Thu, 8 May 2025 14:06:57 +0100
Subject: [PATCH 7/9] feat(ci): make CI output directory configurable

Allow to change the output directory of the CI run
with --json-report-file option

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 tests/conftest.py       | 8 ++++----
 tests/framework/defs.py | 3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 31ef78f38b1..541e4c11bf7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -389,7 +389,7 @@ def io_engine(request):
 
 
 @pytest.fixture
-def results_dir(request):
+def results_dir(request, pytestconfig):
     """
     Fixture yielding the path to a directory into which the test can dump its results
 
@@ -411,9 +411,9 @@ def test_my_file(p, results_dir):
     during doc tests, it will return None.
     """
     try:
-        results_dir = (
-            defs.TEST_RESULTS_DIR / request.node.originalname / request.node.name
-        )
+        report_file = pytestconfig.getoption("--json-report-file")
+        parent = Path(report_file).parent.absolute()
+        results_dir = parent / request.node.originalname / request.node.name
     except AttributeError:
         return None
     results_dir.mkdir(parents=True, exist_ok=True)
diff --git a/tests/framework/defs.py b/tests/framework/defs.py
index aec3568a85d..827c9ee3caa 100644
--- a/tests/framework/defs.py
+++ b/tests/framework/defs.py
@@ -23,9 +23,6 @@
 # Default test session artifacts path
 LOCAL_BUILD_PATH = FC_WORKSPACE_DIR / "build/"
 
-# Absolute path to the test results folder
-TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results"
-
 DEFAULT_BINARY_DIR = (
     LOCAL_BUILD_PATH
     / "cargo_target"

From cfae289127035a0c1dcbe5cbd33fdfdc3bd0c5e6 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Tue, 6 May 2025 13:13:14 +0100
Subject: [PATCH 8/9] feat(a/b): store test results for A/B runs

Currently when A/B is run, only results for B test
are available in the `test_results` dir because this
dir is shared for both runs and the last one overwrites the data.
Now we store results into separate dirs.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 tools/ab_test.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tools/ab_test.py b/tools/ab_test.py
index 2ac15f18a62..470a1ef16ff 100755
--- a/tools/ab_test.py
+++ b/tools/ab_test.py
@@ -19,6 +19,7 @@
 between the two runs, performing statistical regression test across all the list-
 valued properties collected.
 """
+
 import argparse
 import json
 import os
@@ -180,13 +181,17 @@ def uninteresting_dimensions(processed_emf):
     return uninteresting
 
 
-def collect_data(binary_dir: Path, pytest_opts: str):
-    """Executes the specified test using the provided firecracker binaries"""
+def collect_data(tag: str, binary_dir: Path, pytest_opts: str):
+    """
+    Executes the specified test using the provided firecracker binaries and
+    stores results into the `test_results/tag` directory
+    """
     binary_dir = binary_dir.resolve()
 
     print(f"Collecting samples with {binary_dir}")
+    test_report_path = f"test_results/{tag}/test-report.json"
     subprocess.run(
-        f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m ''",
+        f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m '' --json-report-file=../{test_report_path}",
         env=os.environ
         | {
             "AWS_EMF_ENVIRONMENT": "local",
@@ -195,9 +200,8 @@ def collect_data(binary_dir: Path, pytest_opts: str):
         check=True,
         shell=True,
     )
-    return load_data_series(
-        Path("test_results/test-report.json"), binary_dir, reemit=True
-    )
+
+    return load_data_series(Path(test_report_path), binary_dir, reemit=True)
 
 
 def analyze_data(
@@ -327,7 +331,7 @@ def analyze_data(
                 f"for metric \033[1m{metric}\033[0m with \033[0;31m\033[1mp={result.pvalue}\033[0m. "
                 f"This means that observing a change of this magnitude or worse, assuming that performance "
                 f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. "
-                f"Tested Dimensions:\n{json.dumps({k: v for k,v in dimension_set if k not in do_not_print_list}, indent=2, sort_keys=True)}"
+                f"Tested Dimensions:\n{json.dumps({k: v for k, v in dimension_set if k not in do_not_print_list}, indent=2, sort_keys=True)}"
             )
             messages.append(msg)
 
@@ -346,7 +350,7 @@ def ab_performance_test(
     """Does an A/B-test of the specified test with the given firecracker/jailer binaries"""
 
     return binary_ab_test(
-        lambda bin_dir, _: collect_data(bin_dir, pytest_opts),
+        lambda bin_dir, is_a: collect_data(is_a and "A" or "B", bin_dir, pytest_opts),
         lambda ah, be: analyze_data(
             ah,
             be,

From 9303fd7731d5b646ad1a6fae40481489ba0e3954 Mon Sep 17 00:00:00 2001
From: Egor Lazarchuk <yegorlz@amazon.co.uk>
Date: Mon, 12 May 2025 12:30:41 +0100
Subject: [PATCH 9/9] fix(ci): don't archive results for cross snapshot tests

Cross snapshot pipeline run commands after devtool test,
so no archiving is needed.

Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
---
 .buildkite/pipeline_cross.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/pipeline_cross.py b/.buildkite/pipeline_cross.py
index 2b8c36ec428..e9a7be73891 100755
--- a/.buildkite/pipeline_cross.py
+++ b/.buildkite/pipeline_cross.py
@@ -29,7 +29,7 @@
     ]
     instances_aarch64 = ["m7g.metal"]
     commands = [
-        "./tools/devtool -y test --no-build -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
+        "./tools/devtool -y test --no-build --no-archive -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
         # punch holes in mem snapshot tiles and tar them so they are preserved in S3
         "find test_results/test_snapshot_phase1 -type f -name mem |xargs -P4 -t -n1 fallocate -d",
         "mv -v test_results/test_snapshot_phase1 snapshot_artifacts",