firecracker-microvm · roypat · May 13, 2025 · May 6, 2025 · May 2, 2025 · May 2, 2025
diff --git a/.buildkite/pipeline_cross.py b/.buildkite/pipeline_cross.py
@@ -29,7 +29,7 @@
     ]
     instances_aarch64 = ["m7g.metal"]
     commands = [
-        "./tools/devtool -y test --no-build -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
+        "./tools/devtool -y test --no-build --no-archive -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
         # punch holes in mem snapshot tiles and tar them so they are preserved in S3
         "find test_results/test_snapshot_phase1 -type f -name mem |xargs -P4 -t -n1 fallocate -d",
         "mv -v test_results/test_snapshot_phase1 snapshot_artifacts",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -172,7 +172,7 @@ def pytest_runtest_logreport(report):
 
 
 @pytest.fixture()
-def metrics(request):
+def metrics(results_dir, request):
     """Fixture to pass the metrics scope
 
     We use a fixture instead of the @metrics_scope decorator as that conflicts
@@ -188,6 +188,8 @@ def metrics(request):
         metrics_logger.set_property(prop_name, prop_val)
     yield metrics_logger
     metrics_logger.flush()
+    if results_dir:
+        metrics_logger.store_data(results_dir)
 
 
 @pytest.fixture
@@ -387,21 +389,33 @@ def io_engine(request):
 
 
 @pytest.fixture
-def results_dir(request):
+def results_dir(request, pytestconfig):
     """
     Fixture yielding the path to a directory into which the test can dump its results
 
-    Directories are unique per test, and named after the test name. Everything the tests puts
-    into its directory will to be uploaded to S3. Directory will be placed inside defs.TEST_RESULTS_DIR.
+    Directories are unique per test, and their names include test name and test parameters.
+    Everything the tests puts into its directory will to be uploaded to S3.
+    Directory will be placed inside defs.TEST_RESULTS_DIR.
 
     For example
     ```py
-    def test_my_file(results_dir):
+    @pytest.mark.parametrize("p", ["a", "b"])
+    def test_my_file(p, results_dir):
         (results_dir / "output.txt").write_text("Hello World")
     ```
-    will result in `defs.TEST_RESULTS_DIR`/test_my_file/output.txt.
+    will result in:
+    - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[a]/output.txt.
+    - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[b]/output.txt.
+
+    When this fixture is called with DoctestItem as a request.node
+    during doc tests, it will return None.
     """
-    results_dir = defs.TEST_RESULTS_DIR / request.node.originalname
+    try:
+        report_file = pytestconfig.getoption("--json-report-file")
+        parent = Path(report_file).parent.absolute()
+        results_dir = parent / request.node.originalname / request.node.name
+    except AttributeError:
+        return None
     results_dir.mkdir(parents=True, exist_ok=True)
     return results_dir
 

diff --git a/tests/framework/ab_test.py b/tests/framework/ab_test.py
@@ -160,7 +160,7 @@ def git_ab_test_host_command(
 
 
 def set_did_not_grow_comparator(
-    set_generator: Callable[[CommandReturn], set]
+    set_generator: Callable[[CommandReturn], set],
 ) -> Callable[[CommandReturn, CommandReturn], bool]:
     """Factory function for comparators to use with git_ab_test_command that converts the command output to sets
     (using the given callable) and then checks that the "B" set is a subset of the "A" set

diff --git a/tests/framework/defs.py b/tests/framework/defs.py
@@ -23,9 +23,6 @@
 # Default test session artifacts path
 LOCAL_BUILD_PATH = FC_WORKSPACE_DIR / "build/"
 
-# Absolute path to the test results folder
-TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results"
-
 DEFAULT_BINARY_DIR = (
     LOCAL_BUILD_PATH
     / "cargo_target"

diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py
@@ -3,8 +3,8 @@
 
 
 """Provides:
-    - Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
-    - Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
+- Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
+- Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
 """
 
 import datetime

diff --git a/tests/host_tools/metrics.py b/tests/host_tools/metrics.py
@@ -46,52 +46,59 @@
 import json
 import os
 import socket
+from pathlib import Path
 from urllib.parse import urlparse
 
 from aws_embedded_metrics.constants import DEFAULT_NAMESPACE
 from aws_embedded_metrics.logger.metrics_logger_factory import create_metrics_logger
 
 
-class MetricsWrapperDummy:
-    """Send metrics to /dev/null"""
+class MetricsWrapper:
+    """A convenient metrics logger"""
+
+    def __init__(self, logger):
+        self.data = {}
+        self.logger = logger
 
     def set_dimensions(self, *args, **kwargs):
         """Set dimensions"""
+        if self.logger:
+            self.logger.set_dimensions(*args, **kwargs)
 
-    def put_metric(self, *args, **kwargs):
+    def put_metric(self, name, data, unit):
         """Put a datapoint with given dimensions"""
+        if name not in self.data:
+            self.data[name] = {"unit": unit, "values": []}
+        self.data[name]["values"].append(data)
+
+        if self.logger:
+            self.logger.put_metric(name, data, unit)
 
     def set_property(self, *args, **kwargs):
         """Set a property"""
+        if self.logger:
+            self.logger.set_property(*args, **kwargs)
 
     def flush(self):
         """Flush any remaining metrics"""
+        if self.logger:
+            asyncio.run(self.logger.flush())
 
-
-class MetricsWrapper:
-    """A convenient metrics logger"""
-
-    def __init__(self, logger):
-        self.logger = logger
-
-    def __getattr__(self, attr):
-        """Dispatch methods to logger instance"""
-        if attr not in self.__dict__:
-            return getattr(self.logger, attr)
-        return getattr(self, attr)
-
-    def flush(self):
-        """Flush any remaining metrics"""
-        asyncio.run(self.logger.flush())
+    def store_data(self, dir_path):
+        """Store data into a file"""
+        metrics_path = Path(dir_path / "metrics.json")
+        with open(metrics_path, "w", encoding="utf-8") as f:
+            json.dump(self.data, f)
 
 
 def get_metrics_logger():
     """Get a new metrics logger object"""
     # if no metrics namespace, don't output metrics
-    if "AWS_EMF_NAMESPACE" not in os.environ:
-        return MetricsWrapperDummy()
-    logger = create_metrics_logger()
-    logger.reset_dimensions(False)
+    if "AWS_EMF_NAMESPACE" in os.environ:
+        logger = create_metrics_logger()
+        logger.reset_dimensions(False)
+    else:
+        logger = None
     return MetricsWrapper(logger)
 
 

diff --git a/tests/integration_tests/build/test_seccomp_no_redundant_rules.py b/tests/integration_tests/build/test_seccomp_no_redundant_rules.py
@@ -1,7 +1,8 @@
 # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 """A test that fails if it can definitely prove a seccomp rule redundant
-(although it passing does not guarantee the converse, that all rules are definitely needed)."""
+(although it passing does not guarantee the converse, that all rules are definitely needed).
+"""
 import platform
 from pathlib import Path
 

diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py
@@ -5,7 +5,6 @@
 import concurrent
 import glob
 import os
-import shutil
 from pathlib import Path
 
 import pytest
@@ -45,7 +44,7 @@ def prepare_microvm_for_test(microvm):
     check_output("echo 3 > /proc/sys/vm/drop_caches")
 
 
-def run_fio(microvm, mode, block_size, fio_engine="libaio"):
+def run_fio(microvm, mode, block_size, test_output_dir, fio_engine="libaio"):
     """Run a fio test in the specified mode with block size bs."""
     cmd = (
         CmdBuilder("fio")
@@ -71,16 +70,11 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
         .with_arg(f"--write_bw_log={mode}")
         .with_arg(f"--write_lat_log={mode}")
         .with_arg("--log_avg_msec=1000")
+        .with_arg("--output-format=json+")
+        .with_arg("--output=/tmp/fio.json")
         .build()
     )
 
-    logs_path = Path(microvm.jailer.chroot_base_with_id()) / "fio_output"
-
-    if logs_path.is_dir():
-        shutil.rmtree(logs_path)
-
-    logs_path.mkdir()
-
     prepare_microvm_for_test(microvm)
 
     # Start the CPU load monitor.
@@ -97,17 +91,23 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
         assert rc == 0, stderr
         assert stderr == ""
 
-        microvm.ssh.scp_get("/tmp/*.log", logs_path)
-        microvm.ssh.check_output("rm /tmp/*.log")
+        microvm.ssh.scp_get("/tmp/fio.json", test_output_dir)
+        microvm.ssh.scp_get("/tmp/*.log", test_output_dir)
 
-        return logs_path, cpu_load_future.result()
+        return cpu_load_future.result()
 
 
-def process_fio_log_files(logs_glob):
-    """Parses all fio log files matching the given glob and yields tuples of same-timestamp read and write metrics"""
+def process_fio_log_files(root_dir, logs_glob):
+    """
+    Parses all fio log files in the root_dir matching the given glob and
+    yields tuples of same-timestamp read and write metrics
+    """
+    # We specify `root_dir` for `glob.glob` because otherwise it will
+    # struggle with directory with names like:
+    # test_block_performance[vmlinux-5.10.233-Sync-bs4096-randread-1vcpu]
     data = [
-        Path(pathname).read_text("UTF-8").splitlines()
-        for pathname in glob.glob(logs_glob)
+        Path(root_dir / pathname).read_text("UTF-8").splitlines()
+        for pathname in glob.glob(logs_glob, root_dir=root_dir)
     ]
 
     assert data, "no log files found!"
@@ -134,13 +134,13 @@ def process_fio_log_files(logs_glob):
 
 def emit_fio_metrics(logs_dir, metrics):
     """Parses the fio logs in `{logs_dir}/*_[clat|bw].*.log and emits their contents as CloudWatch metrics"""
-    for bw_read, bw_write in process_fio_log_files(f"{logs_dir}/*_bw.*.log"):
+    for bw_read, bw_write in process_fio_log_files(logs_dir, "*_bw.*.log"):
         if bw_read:
             metrics.put_metric("bw_read", sum(bw_read), "Kilobytes/Second")
         if bw_write:
             metrics.put_metric("bw_write", sum(bw_write), "Kilobytes/Second")
 
-    for lat_read, lat_write in process_fio_log_files(f"{logs_dir}/*_clat.*.log"):
+    for lat_read, lat_write in process_fio_log_files(logs_dir, "*_clat.*.log"):
         # latency values in fio logs are in nanoseconds, but cloudwatch only supports
         # microseconds as the more granular unit, so need to divide by 1000.
         for value in lat_read:
@@ -164,6 +164,7 @@ def test_block_performance(
     fio_engine,
     io_engine,
     metrics,
+    results_dir,
 ):
     """
     Execute block device emulation benchmarking scenarios.
@@ -192,9 +193,9 @@ def test_block_performance(
 
     vm.pin_threads(0)
 
-    logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size, fio_engine)
+    cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir, fio_engine)
 
-    emit_fio_metrics(logs_dir, metrics)
+    emit_fio_metrics(results_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values:
@@ -213,6 +214,7 @@ def test_block_vhost_user_performance(
     fio_mode,
     fio_block_size,
     metrics,
+    results_dir,
 ):
     """
     Execute block device emulation benchmarking scenarios.
@@ -242,9 +244,9 @@ def test_block_vhost_user_performance(
     next_cpu = vm.pin_threads(0)
     vm.disks_vhost_user["scratch"].pin(next_cpu)
 
-    logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size)
+    cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir)
 
-    emit_fio_metrics(logs_dir, metrics)
+    emit_fio_metrics(results_dir, metrics)
 
     for thread_name, values in cpu_util.items():
         for value in values:

diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Tests the network latency of a Firecracker guest."""
 
+import json
 import re
+from pathlib import Path
 
 import pytest
 
@@ -95,6 +97,7 @@ def test_network_tcp_throughput(
     payload_length,
     mode,
     metrics,
+    results_dir,
 ):
     """
     Iperf between guest and host in both directions for TCP workload.
@@ -133,4 +136,13 @@ def test_network_tcp_throughput(
     )
     data = test.run_test(network_microvm.vcpus_count + 2)
 
+    for i, g2h in enumerate(data["g2h"]):
+        Path(results_dir / f"g2h_{i}.json").write_text(
+            json.dumps(g2h), encoding="utf-8"
+        )
+    for i, h2g in enumerate(data["h2g"]):
+        Path(results_dir / f"h2g_{i}.json").write_text(
+            json.dumps(h2g), encoding="utf-8"
+        )
+
     emit_iperf3_metrics(metrics, data, warmup_sec)
diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock_ab.py
@@ -2,7 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 """Tests the VSOCK throughput of Firecracker uVMs."""
 
+import json
 import os
+from pathlib import Path
 
 import pytest
 
@@ -73,7 +75,14 @@ def guest_command(self, port_offset):
 @pytest.mark.parametrize("payload_length", ["64K", "1024K"], ids=["p64K", "p1024K"])
 @pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
 def test_vsock_throughput(
-    microvm_factory, guest_kernel_acpi, rootfs, vcpus, payload_length, mode, metrics
+    microvm_factory,
+    guest_kernel_acpi,
+    rootfs,
+    vcpus,
+    payload_length,
+    mode,
+    metrics,
+    results_dir,
 ):
     """
     Test vsock throughput for multiple vm configurations.
@@ -107,4 +116,13 @@ def test_vsock_throughput(
     test = VsockIPerf3Test(vm, mode, payload_length)
     data = test.run_test(vm.vcpus_count + 2)
 
+    for i, g2h in enumerate(data["g2h"]):
+        Path(results_dir / f"g2h_{i}.json").write_text(
+            json.dumps(g2h), encoding="utf-8"
+        )
+    for i, h2g in enumerate(data["h2g"]):
+        Path(results_dir / f"h2g_{i}.json").write_text(
+            json.dumps(h2g), encoding="utf-8"
+        )
+
     emit_iperf3_metrics(metrics, data, VsockIPerf3Test.WARMUP_SEC)