Skip to content

Save tests metrics and performance artifacts #5191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/pipeline_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
]
instances_aarch64 = ["m7g.metal"]
commands = [
"./tools/devtool -y test --no-build -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
"./tools/devtool -y test --no-build --no-archive -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py",
# punch holes in mem snapshot tiles and tar them so they are preserved in S3
"find test_results/test_snapshot_phase1 -type f -name mem |xargs -P4 -t -n1 fallocate -d",
"mv -v test_results/test_snapshot_phase1 snapshot_artifacts",
Expand Down
28 changes: 21 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def pytest_runtest_logreport(report):


@pytest.fixture()
def metrics(request):
def metrics(results_dir, request):
"""Fixture to pass the metrics scope

We use a fixture instead of the @metrics_scope decorator as that conflicts
Expand All @@ -188,6 +188,8 @@ def metrics(request):
metrics_logger.set_property(prop_name, prop_val)
yield metrics_logger
metrics_logger.flush()
if results_dir:
metrics_logger.store_data(results_dir)


@pytest.fixture
Expand Down Expand Up @@ -387,21 +389,33 @@ def io_engine(request):


@pytest.fixture
def results_dir(request):
def results_dir(request, pytestconfig):
"""
Fixture yielding the path to a directory into which the test can dump its results

Directories are unique per test, and named after the test name. Everything the tests puts
into its directory will to be uploaded to S3. Directory will be placed inside defs.TEST_RESULTS_DIR.
Directories are unique per test, and their names include test name and test parameters.
Everything the tests puts into its directory will to be uploaded to S3.
Directory will be placed inside defs.TEST_RESULTS_DIR.

For example
```py
def test_my_file(results_dir):
@pytest.mark.parametrize("p", ["a", "b"])
def test_my_file(p, results_dir):
(results_dir / "output.txt").write_text("Hello World")
```
will result in `defs.TEST_RESULTS_DIR`/test_my_file/output.txt.
will result in:
- `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[a]/output.txt.
- `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[b]/output.txt.

When this fixture is called with DoctestItem as a request.node
during doc tests, it will return None.
"""
results_dir = defs.TEST_RESULTS_DIR / request.node.originalname
try:
report_file = pytestconfig.getoption("--json-report-file")
parent = Path(report_file).parent.absolute()
results_dir = parent / request.node.originalname / request.node.name
except AttributeError:
return None
results_dir.mkdir(parents=True, exist_ok=True)
return results_dir

Expand Down
2 changes: 1 addition & 1 deletion tests/framework/ab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def git_ab_test_host_command(


def set_did_not_grow_comparator(
set_generator: Callable[[CommandReturn], set]
set_generator: Callable[[CommandReturn], set],
) -> Callable[[CommandReturn, CommandReturn], bool]:
"""Factory function for comparators to use with git_ab_test_command that converts the command output to sets
(using the given callable) and then checks that the "B" set is a subset of the "A" set
Expand Down
3 changes: 0 additions & 3 deletions tests/framework/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
# Default test session artifacts path
LOCAL_BUILD_PATH = FC_WORKSPACE_DIR / "build/"

# Absolute path to the test results folder
TEST_RESULTS_DIR = FC_WORKSPACE_DIR / "test_results"

DEFAULT_BINARY_DIR = (
LOCAL_BUILD_PATH
/ "cargo_target"
Expand Down
4 changes: 2 additions & 2 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@


"""Provides:
- Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
- Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
- Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch
- Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics.
"""

import datetime
Expand Down
53 changes: 30 additions & 23 deletions tests/host_tools/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,52 +46,59 @@
import json
import os
import socket
from pathlib import Path
from urllib.parse import urlparse

from aws_embedded_metrics.constants import DEFAULT_NAMESPACE
from aws_embedded_metrics.logger.metrics_logger_factory import create_metrics_logger


class MetricsWrapperDummy:
"""Send metrics to /dev/null"""
class MetricsWrapper:
"""A convenient metrics logger"""

def __init__(self, logger):
self.data = {}
self.logger = logger

def set_dimensions(self, *args, **kwargs):
"""Set dimensions"""
if self.logger:
self.logger.set_dimensions(*args, **kwargs)

def put_metric(self, *args, **kwargs):
def put_metric(self, name, data, unit):
"""Put a datapoint with given dimensions"""
if name not in self.data:
self.data[name] = {"unit": unit, "values": []}
self.data[name]["values"].append(data)

if self.logger:
self.logger.put_metric(name, data, unit)

def set_property(self, *args, **kwargs):
"""Set a property"""
if self.logger:
self.logger.set_property(*args, **kwargs)

def flush(self):
"""Flush any remaining metrics"""
if self.logger:
asyncio.run(self.logger.flush())


class MetricsWrapper:
"""A convenient metrics logger"""

def __init__(self, logger):
self.logger = logger

def __getattr__(self, attr):
"""Dispatch methods to logger instance"""
if attr not in self.__dict__:
return getattr(self.logger, attr)
return getattr(self, attr)

def flush(self):
"""Flush any remaining metrics"""
asyncio.run(self.logger.flush())
def store_data(self, dir_path):
"""Store data into a file"""
metrics_path = Path(dir_path / "metrics.json")
with open(metrics_path, "w", encoding="utf-8") as f:
json.dump(self.data, f)


def get_metrics_logger():
"""Get a new metrics logger object"""
# if no metrics namespace, don't output metrics
if "AWS_EMF_NAMESPACE" not in os.environ:
return MetricsWrapperDummy()
logger = create_metrics_logger()
logger.reset_dimensions(False)
if "AWS_EMF_NAMESPACE" in os.environ:
logger = create_metrics_logger()
logger.reset_dimensions(False)
else:
logger = None
return MetricsWrapper(logger)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""A test that fails if it can definitely prove a seccomp rule redundant
(although it passing does not guarantee the converse, that all rules are definitely needed)."""
(although it passing does not guarantee the converse, that all rules are definitely needed).
"""
import platform
from pathlib import Path

Expand Down
46 changes: 24 additions & 22 deletions tests/integration_tests/performance/test_block_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import concurrent
import glob
import os
import shutil
from pathlib import Path

import pytest
Expand Down Expand Up @@ -45,7 +44,7 @@ def prepare_microvm_for_test(microvm):
check_output("echo 3 > /proc/sys/vm/drop_caches")


def run_fio(microvm, mode, block_size, fio_engine="libaio"):
def run_fio(microvm, mode, block_size, test_output_dir, fio_engine="libaio"):
"""Run a fio test in the specified mode with block size bs."""
cmd = (
CmdBuilder("fio")
Expand All @@ -71,16 +70,11 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
.with_arg(f"--write_bw_log={mode}")
.with_arg(f"--write_lat_log={mode}")
.with_arg("--log_avg_msec=1000")
.with_arg("--output-format=json+")
.with_arg("--output=/tmp/fio.json")
.build()
)

logs_path = Path(microvm.jailer.chroot_base_with_id()) / "fio_output"

if logs_path.is_dir():
shutil.rmtree(logs_path)

logs_path.mkdir()

prepare_microvm_for_test(microvm)

# Start the CPU load monitor.
Expand All @@ -97,17 +91,23 @@ def run_fio(microvm, mode, block_size, fio_engine="libaio"):
assert rc == 0, stderr
assert stderr == ""

microvm.ssh.scp_get("/tmp/*.log", logs_path)
microvm.ssh.check_output("rm /tmp/*.log")
microvm.ssh.scp_get("/tmp/fio.json", test_output_dir)
microvm.ssh.scp_get("/tmp/*.log", test_output_dir)

return logs_path, cpu_load_future.result()
return cpu_load_future.result()


def process_fio_log_files(logs_glob):
"""Parses all fio log files matching the given glob and yields tuples of same-timestamp read and write metrics"""
def process_fio_log_files(root_dir, logs_glob):
"""
Parses all fio log files in the root_dir matching the given glob and
yields tuples of same-timestamp read and write metrics
"""
# We specify `root_dir` for `glob.glob` because otherwise it will
# struggle with directory with names like:
# test_block_performance[vmlinux-5.10.233-Sync-bs4096-randread-1vcpu]
data = [
Path(pathname).read_text("UTF-8").splitlines()
for pathname in glob.glob(logs_glob)
Path(root_dir / pathname).read_text("UTF-8").splitlines()
for pathname in glob.glob(logs_glob, root_dir=root_dir)
]

assert data, "no log files found!"
Expand All @@ -134,13 +134,13 @@ def process_fio_log_files(logs_glob):

def emit_fio_metrics(logs_dir, metrics):
"""Parses the fio logs in `{logs_dir}/*_[clat|bw].*.log and emits their contents as CloudWatch metrics"""
for bw_read, bw_write in process_fio_log_files(f"{logs_dir}/*_bw.*.log"):
for bw_read, bw_write in process_fio_log_files(logs_dir, "*_bw.*.log"):
if bw_read:
metrics.put_metric("bw_read", sum(bw_read), "Kilobytes/Second")
if bw_write:
metrics.put_metric("bw_write", sum(bw_write), "Kilobytes/Second")

for lat_read, lat_write in process_fio_log_files(f"{logs_dir}/*_clat.*.log"):
for lat_read, lat_write in process_fio_log_files(logs_dir, "*_clat.*.log"):
# latency values in fio logs are in nanoseconds, but cloudwatch only supports
# microseconds as the more granular unit, so need to divide by 1000.
for value in lat_read:
Expand All @@ -164,6 +164,7 @@ def test_block_performance(
fio_engine,
io_engine,
metrics,
results_dir,
):
"""
Execute block device emulation benchmarking scenarios.
Expand Down Expand Up @@ -192,9 +193,9 @@ def test_block_performance(

vm.pin_threads(0)

logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size, fio_engine)
cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir, fio_engine)

emit_fio_metrics(logs_dir, metrics)
emit_fio_metrics(results_dir, metrics)

for thread_name, values in cpu_util.items():
for value in values:
Expand All @@ -213,6 +214,7 @@ def test_block_vhost_user_performance(
fio_mode,
fio_block_size,
metrics,
results_dir,
):
"""
Execute block device emulation benchmarking scenarios.
Expand Down Expand Up @@ -242,9 +244,9 @@ def test_block_vhost_user_performance(
next_cpu = vm.pin_threads(0)
vm.disks_vhost_user["scratch"].pin(next_cpu)

logs_dir, cpu_util = run_fio(vm, fio_mode, fio_block_size)
cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir)

emit_fio_metrics(logs_dir, metrics)
emit_fio_metrics(results_dir, metrics)

for thread_name, values in cpu_util.items():
for value in values:
Expand Down
12 changes: 12 additions & 0 deletions tests/integration_tests/performance/test_network_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
"""Tests the network latency of a Firecracker guest."""

import json
import re
from pathlib import Path

import pytest

Expand Down Expand Up @@ -95,6 +97,7 @@ def test_network_tcp_throughput(
payload_length,
mode,
metrics,
results_dir,
):
"""
Iperf between guest and host in both directions for TCP workload.
Expand Down Expand Up @@ -133,4 +136,13 @@ def test_network_tcp_throughput(
)
data = test.run_test(network_microvm.vcpus_count + 2)

for i, g2h in enumerate(data["g2h"]):
Path(results_dir / f"g2h_{i}.json").write_text(
json.dumps(g2h), encoding="utf-8"
)
for i, h2g in enumerate(data["h2g"]):
Path(results_dir / f"h2g_{i}.json").write_text(
json.dumps(h2g), encoding="utf-8"
)

emit_iperf3_metrics(metrics, data, warmup_sec)
20 changes: 19 additions & 1 deletion tests/integration_tests/performance/test_vsock_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
"""Tests the VSOCK throughput of Firecracker uVMs."""

import json
import os
from pathlib import Path

import pytest

Expand Down Expand Up @@ -73,7 +75,14 @@ def guest_command(self, port_offset):
@pytest.mark.parametrize("payload_length", ["64K", "1024K"], ids=["p64K", "p1024K"])
@pytest.mark.parametrize("mode", ["g2h", "h2g", "bd"])
def test_vsock_throughput(
microvm_factory, guest_kernel_acpi, rootfs, vcpus, payload_length, mode, metrics
microvm_factory,
guest_kernel_acpi,
rootfs,
vcpus,
payload_length,
mode,
metrics,
results_dir,
):
"""
Test vsock throughput for multiple vm configurations.
Expand Down Expand Up @@ -107,4 +116,13 @@ def test_vsock_throughput(
test = VsockIPerf3Test(vm, mode, payload_length)
data = test.run_test(vm.vcpus_count + 2)

for i, g2h in enumerate(data["g2h"]):
Path(results_dir / f"g2h_{i}.json").write_text(
json.dumps(g2h), encoding="utf-8"
)
for i, h2g in enumerate(data["h2g"]):
Path(results_dir / f"h2g_{i}.json").write_text(
json.dumps(h2g), encoding="utf-8"
)

emit_iperf3_metrics(metrics, data, VsockIPerf3Test.WARMUP_SEC)
Loading