Skip to content

Support mlc experiment entries #412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
types: [published]
workflow_dispatch: {}


jobs:
build_wheels:
if: github.repository_owner == 'mlcommons'
Expand Down
16 changes: 16 additions & 0 deletions automation/script/experiment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from collections import defaultdict
import os
from mlc.main import ExperimentAction
import mlc.utils as utils
from mlc import utils
from utils import *
import logging
Expand All @@ -26,6 +28,8 @@ def experiment_run(self_module, i):
show_time = i.get('show_time', False)
logger = self_module.logger
env = i.get('env', {})
experiment_action = ExperimentAction(self_module.action_object.parent)

prune_result = prune_input(
{'input': i, 'extra_keys_starts_with': ['exp.']})
if prune_result['return'] > 0:
Expand Down Expand Up @@ -80,6 +84,18 @@ def experiment_run(self_module, i):
if r['return'] > 0:
return r

experiment_meta = {}
exp_tags = tags
ii = {'action': 'update',
'target': 'experiment',
'script_alias': meta['alias'],
'tags': ','.join(exp_tags),
'meta': experiment_meta,
'force': True}
r = experiment_action.access(ii)
if r['return'] > 0:
return r

return {'return': 0}


Expand Down
106 changes: 106 additions & 0 deletions script/save-machine-state/capture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os
import json
import psutil
import platform
import subprocess
from datetime import datetime


def read_file_safe(path):
try:
with open(path, 'r') as f:
return f.read().strip()
except Exception:
return None


def run_command_safe(command, require_sudo=False):
if require_sudo and os.geteuid() != 0:
return "Skipped (requires sudo)"
try:
output = subprocess.check_output(command, shell=True, text=True)
return output.strip()
except subprocess.CalledProcessError:
return "Error running command"


def detect_container_context():
context = {
"docker_env": os.path.exists('/.dockerenv'),
"cgroup_indicators": []
}
cgroup = read_file_safe('/proc/1/cgroup')
if cgroup:
for line in cgroup.splitlines():
if any(x in line for x in ['docker', 'kubepods', 'containerd']):
context["cgroup_indicators"].append(line)
return context


def get_mounted_file_systems():
try:
with open("/proc/mounts", "r") as f:
return [line.strip() for line in f.readlines()]
except BaseException:
return []


def capture_machine_state():
state = {
"timestamp": datetime.now().isoformat(),
"platform": {
"system": platform.system(),
"node": platform.node(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
"processor": platform.processor()
},
"cpu": {
"logical_cores": psutil.cpu_count(logical=True),
"physical_cores": psutil.cpu_count(logical=False),
"load_avg": psutil.getloadavg(),
"cpu_percent": psutil.cpu_percent(interval=1)
},
"memory": {
"virtual_memory": dict(psutil.virtual_memory()._asdict()),
"swap_memory": dict(psutil.swap_memory()._asdict())
},
"disk": {
"disk_usage": dict(psutil.disk_usage('/')._asdict()),
"partitions": [dict(p._asdict()) for p in psutil.disk_partitions()]
},
"bios": {
"vendor": run_command_safe("dmidecode -s bios-vendor", require_sudo=True),
"version": run_command_safe("dmidecode -s bios-version", require_sudo=True),
"release_date": run_command_safe("dmidecode -s bios-release-date", require_sudo=True)
},
"thp_settings": {
"enabled": read_file_safe("/sys/kernel/mm/transparent_hugepage/enabled") or "Skipped (requires sudo or permission)",
"defrag": read_file_safe("/sys/kernel/mm/transparent_hugepage/defrag") or "Skipped (requires sudo or permission)"
},
"kernel": {
"cmdline": read_file_safe("/proc/cmdline")
},
"uptime": read_file_safe("/proc/uptime"),
"process_count": len(psutil.pids()),
"users_sessions": [dict(u._asdict()) for u in psutil.users()],
"container_context": detect_container_context(),
"mounted_filesystems": get_mounted_file_systems()
}
return state


def save_state_to_file(state, filename):
with open(filename, "w") as f:
json.dump(state, f, indent=4)


# Example usage
if __name__ == "__main__":

state = capture_machine_state()
save_file = os.environ.get(
'MLC_SYSTEM_STATE_SAVE_FILENAME',
'machine_state.json')
save_state_to_file(state, save_file)
47 changes: 47 additions & 0 deletions script/save-machine-state/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from mlc import utils
import os
import subprocess


def check_installation(command, os_info):
if os_info['platform'] == "windows":
return subprocess.call(
[command, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) == 0
elif os_info['platform'] == "linux":
return subprocess.call(['which', command], stdout=subprocess.PIPE,
stderr=subprocess.PIPE) == 0 # 0 means the package is there


def preprocess(i):

os_info = i['os_info']
env = i['env']

if not check_installation("numactl", os_info):
env['MLC_INSTALL_NUMACTL'] = 'True'

# if not check_installation("cpupower",os_info):
env['MLC_INSTALL_CPUPOWER'] = 'True'

if env.get('MLC_PLATFORM_DETAILS_FILE_PATH', '') == '':
if env.get('MLC_PLATFORM_DETAILS_DIR_PATH', '') == '':
env['MLC_PLATFORM_DETAILS_DIR_PATH'] = os.getcwd()
if env.get('MLC_PLATFORM_DETAILS_FILE_NAME', '') == '':
env['MLC_PLATFORM_DETAILS_FILE_NAME'] = "system-info.txt"
env['MLC_PLATFORM_DETAILS_FILE_PATH'] = os.path.join(
env['MLC_PLATFORM_DETAILS_DIR_PATH'], env['MLC_PLATFORM_DETAILS_FILE_NAME'])

return {'return': 0}


def postprocess(i):

state = i['state']

env = i['env']

os_info = i['os_info']

automation = i['automation']

return {'return': 0}
67 changes: 67 additions & 0 deletions script/save-machine-state/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
alias: save-machine-state
automation_alias: script
automation_uid: 5b4e0237da074764
cache: false
category: Platform information
deps:
- tags: detect,os
- skip_if_env:
MLC_HOST_OS_TYPE:
- windows
tags: detect,sudo
- names:
- python
- python3
tags: get,python
- skip_if_any_env:
MLC_HOST_OS_TYPE:
- windows
skip_if_env:
MLC_SUDO_USER:
- 'no'
tags: get,sys-util,generic,_psmisc
- enable_if_env:
MLC_HOST_OS_TYPE:
- linux
skip_if_env:
MLC_SUDO_USER:
- 'no'
tags: get,sys-util,generic,_systemd
- enable_if_env:
MLC_HOST_OS_TYPE:
- linux
skip_if_env:
MLC_SUDO_USER:
- 'no'
tags: get,sys-util,generic,_dmidecode
- tags: get,generic-python-lib,_package.psutil
input_mapping:
outfile: MLC_SYSTEM_STATE_SAVE_FILENAME
prehook_deps:
- enable_if_env:
MLC_HOST_OS_TYPE:
- linux
MLC_INSTALL_NUMACTL:
- 'True'
skip_if_env:
MLC_SUDO_USER:
- 'no'
tags: get,sys-util,generic,_numactl
- enable_if_env:
MLC_HOST_OS_TYPE:
- linux
MLC_INSTALL_CPUPOWER:
- 'True'
env:
MLC_TMP_FAIL_SAFE: 'yes'
ignore_missing: true
skip_if_env:
MLC_SUDO_USER:
- 'no'
tags: get,sys-util,generic,_linux-tools
tags:
- machine-state
- save
- machine
- state
uid: 2f62820ed7294659
4 changes: 4 additions & 0 deletions script/save-machine-state/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -e

${MLC_PYTHON_BIN_WITH_PATH} ${MLC_TMP_CURRENT_SCRIPT_PATH}/capture.py
Loading