diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 86b93ac06..55e79c0ae 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -5,6 +5,7 @@ on: types: [published] workflow_dispatch: {} + jobs: build_wheels: if: github.repository_owner == 'mlcommons' diff --git a/automation/script/experiment.py b/automation/script/experiment.py index 5195ce056..63a29f005 100644 --- a/automation/script/experiment.py +++ b/automation/script/experiment.py @@ -1,5 +1,7 @@ from collections import defaultdict import os +from mlc.main import ExperimentAction +import mlc.utils as utils from mlc import utils from utils import * import logging @@ -26,6 +28,8 @@ def experiment_run(self_module, i): show_time = i.get('show_time', False) logger = self_module.logger env = i.get('env', {}) + experiment_action = ExperimentAction(self_module.action_object.parent) + prune_result = prune_input( {'input': i, 'extra_keys_starts_with': ['exp.']}) if prune_result['return'] > 0: @@ -80,6 +84,18 @@ def experiment_run(self_module, i): if r['return'] > 0: return r + experiment_meta = {} + exp_tags = tags + ii = {'action': 'update', + 'target': 'experiment', + 'script_alias': meta['alias'], + 'tags': ','.join(exp_tags), + 'meta': experiment_meta, + 'force': True} + r = experiment_action.access(ii) + if r['return'] > 0: + return r + return {'return': 0} diff --git a/script/save-machine-state/capture.py b/script/save-machine-state/capture.py new file mode 100644 index 000000000..02963eb81 --- /dev/null +++ b/script/save-machine-state/capture.py @@ -0,0 +1,106 @@ +import os +import json +import psutil +import platform +import subprocess +from datetime import datetime + + +def read_file_safe(path): + try: + with open(path, 'r') as f: + return f.read().strip() + except Exception: + return None + + +def run_command_safe(command, require_sudo=False): + if require_sudo and os.geteuid() != 0: + return "Skipped (requires sudo)" + try: + output = subprocess.check_output(command, shell=True, text=True) + return output.strip() + except subprocess.CalledProcessError: + return "Error running command" + + +def detect_container_context(): + context = { + "docker_env": os.path.exists('/.dockerenv'), + "cgroup_indicators": [] + } + cgroup = read_file_safe('/proc/1/cgroup') + if cgroup: + for line in cgroup.splitlines(): + if any(x in line for x in ['docker', 'kubepods', 'containerd']): + context["cgroup_indicators"].append(line) + return context + + +def get_mounted_file_systems(): + try: + with open("/proc/mounts", "r") as f: + return [line.strip() for line in f.readlines()] + except BaseException: + return [] + + +def capture_machine_state(): + state = { + "timestamp": datetime.now().isoformat(), + "platform": { + "system": platform.system(), + "node": platform.node(), + "release": platform.release(), + "version": platform.version(), + "machine": platform.machine(), + "processor": platform.processor() + }, + "cpu": { + "logical_cores": psutil.cpu_count(logical=True), + "physical_cores": psutil.cpu_count(logical=False), + "load_avg": psutil.getloadavg(), + "cpu_percent": psutil.cpu_percent(interval=1) + }, + "memory": { + "virtual_memory": dict(psutil.virtual_memory()._asdict()), + "swap_memory": dict(psutil.swap_memory()._asdict()) + }, + "disk": { + "disk_usage": dict(psutil.disk_usage('/')._asdict()), + "partitions": [dict(p._asdict()) for p in psutil.disk_partitions()] + }, + "bios": { + "vendor": run_command_safe("dmidecode -s bios-vendor", require_sudo=True), + "version": run_command_safe("dmidecode -s bios-version", require_sudo=True), + "release_date": run_command_safe("dmidecode -s bios-release-date", require_sudo=True) + }, + "thp_settings": { + "enabled": read_file_safe("/sys/kernel/mm/transparent_hugepage/enabled") or "Skipped (requires sudo or permission)", + "defrag": read_file_safe("/sys/kernel/mm/transparent_hugepage/defrag") or "Skipped (requires sudo or permission)" + }, + "kernel": { + "cmdline": read_file_safe("/proc/cmdline") + }, + "uptime": read_file_safe("/proc/uptime"), + "process_count": len(psutil.pids()), + "users_sessions": [dict(u._asdict()) for u in psutil.users()], + "container_context": detect_container_context(), + "mounted_filesystems": get_mounted_file_systems() + } + return state + + +def save_state_to_file(state, filename): + with open(filename, "w") as f: + json.dump(state, f, indent=4) + + +# Example usage +if __name__ == "__main__": + + state = capture_machine_state() + save_file = os.environ.get( + 'MLC_SYSTEM_STATE_SAVE_FILENAME', + 'machine_state.json') + save_state_to_file(state, save_file) diff --git a/script/save-machine-state/customize.py b/script/save-machine-state/customize.py new file mode 100644 index 000000000..a7dbcc4f2 --- /dev/null +++ b/script/save-machine-state/customize.py @@ -0,0 +1,47 @@ +from mlc import utils +import os +import subprocess + + +def check_installation(command, os_info): + if os_info['platform'] == "windows": + return subprocess.call( + [command, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) == 0 + elif os_info['platform'] == "linux": + return subprocess.call(['which', command], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) == 0 # 0 means the package is there + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + if not check_installation("numactl", os_info): + env['MLC_INSTALL_NUMACTL'] = 'True' + + # if not check_installation("cpupower",os_info): + env['MLC_INSTALL_CPUPOWER'] = 'True' + + if env.get('MLC_PLATFORM_DETAILS_FILE_PATH', '') == '': + if env.get('MLC_PLATFORM_DETAILS_DIR_PATH', '') == '': + env['MLC_PLATFORM_DETAILS_DIR_PATH'] = os.getcwd() + if env.get('MLC_PLATFORM_DETAILS_FILE_NAME', '') == '': + env['MLC_PLATFORM_DETAILS_FILE_NAME'] = "system-info.txt" + env['MLC_PLATFORM_DETAILS_FILE_PATH'] = os.path.join( + env['MLC_PLATFORM_DETAILS_DIR_PATH'], env['MLC_PLATFORM_DETAILS_FILE_NAME']) + + return {'return': 0} + + +def postprocess(i): + + state = i['state'] + + env = i['env'] + + os_info = i['os_info'] + + automation = i['automation'] + + return {'return': 0} diff --git a/script/save-machine-state/meta.yaml b/script/save-machine-state/meta.yaml new file mode 100644 index 000000000..7d9a07cd1 --- /dev/null +++ b/script/save-machine-state/meta.yaml @@ -0,0 +1,67 @@ +alias: save-machine-state +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Platform information +deps: +- tags: detect,os +- skip_if_env: + MLC_HOST_OS_TYPE: + - windows + tags: detect,sudo +- names: + - python + - python3 + tags: get,python +- skip_if_any_env: + MLC_HOST_OS_TYPE: + - windows + skip_if_env: + MLC_SUDO_USER: + - 'no' + tags: get,sys-util,generic,_psmisc +- enable_if_env: + MLC_HOST_OS_TYPE: + - linux + skip_if_env: + MLC_SUDO_USER: + - 'no' + tags: get,sys-util,generic,_systemd +- enable_if_env: + MLC_HOST_OS_TYPE: + - linux + skip_if_env: + MLC_SUDO_USER: + - 'no' + tags: get,sys-util,generic,_dmidecode +- tags: get,generic-python-lib,_package.psutil +input_mapping: + outfile: MLC_SYSTEM_STATE_SAVE_FILENAME +prehook_deps: +- enable_if_env: + MLC_HOST_OS_TYPE: + - linux + MLC_INSTALL_NUMACTL: + - 'True' + skip_if_env: + MLC_SUDO_USER: + - 'no' + tags: get,sys-util,generic,_numactl +- enable_if_env: + MLC_HOST_OS_TYPE: + - linux + MLC_INSTALL_CPUPOWER: + - 'True' + env: + MLC_TMP_FAIL_SAFE: 'yes' + ignore_missing: true + skip_if_env: + MLC_SUDO_USER: + - 'no' + tags: get,sys-util,generic,_linux-tools +tags: +- machine-state +- save +- machine +- state +uid: 2f62820ed7294659 diff --git a/script/save-machine-state/run.sh b/script/save-machine-state/run.sh new file mode 100644 index 000000000..28e1867f6 --- /dev/null +++ b/script/save-machine-state/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +${MLC_PYTHON_BIN_WITH_PATH} ${MLC_TMP_CURRENT_SCRIPT_PATH}/capture.py