Skip to content

Commit 5a1ef0e

Browse files
authored
ENH: Doc config in sheet (#869)
1 parent 525646a commit 5a1ef0e

File tree

3 files changed

+54
-44
lines changed

3 files changed

+54
-44
lines changed

docs/source/v1.6.md.inc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
- Added saving of clean raw data in addition to epochs (#840 by @larsoner)
1010
- Added saving of detected blink and cardiac events used to calculate SSP projectors (#840 by @larsoner)
1111
- Added [`noise_cov_method`][mne_bids_pipeline._config.noise_cov_method] to allow for the use of methods other than `"shrunk"` for noise covariance estimation (#854 by @larsoner)
12-
- Added option to pass `image_kwargs` to [`mne.Report.add_epochs`] to allow adjusting e.g. `"vmin"` and `"vmax"` of the epochs image in the report via [`report_add_epochs_image_kwargs`][mne_bids_pipeline._config.report_add_epochs_image_kwargs] (#848 by @SophieHerbst)
12+
- Added option to pass `image_kwargs` to [`mne.Report.add_epochs`] to allow adjusting e.g. `"vmin"` and `"vmax"` of the epochs image in the report via [`report_add_epochs_image_kwargs`][mne_bids_pipeline._config.report_add_epochs_image_kwargs]. This feature requires MNE-Python 1.7 or newer. (#848 by @SophieHerbst)
1313
- Split ICA fitting and artifact detection into separate steps. This means that now, ICA is split into a total of three consecutive steps: fitting, artifact detection, and the actual data cleaning step ("applying ICA"). This makes it easier to experiment with different settings for artifact detection without needing to re-fit ICA. (#865 by @larsoner)
14+
- The configuration used for the pipeline is now saved in a separate spreadsheet in the `.xlsx` log file (#869 by @larsoner)
1415

1516
[//]: # (### :warning: Behavior changes)
1617

mne_bids_pipeline/_config_import.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,18 @@ def _import_config(
2828
"""Import the default config and the user's config."""
2929
# Get the default
3030
config = _get_default_config()
31+
# Public names users generally will have in their config
3132
valid_names = [d for d in dir(config) if not d.startswith("_")]
33+
# Names that we will reduce the SimpleConfig to before returning
34+
# (see _update_with_user_config)
35+
keep_names = [d for d in dir(config) if not d.startswith("__")] + [
36+
"config_path",
37+
"PIPELINE_NAME",
38+
"VERSION",
39+
"CODE_URL",
40+
"_raw_split_size",
41+
"_epochs_split_size",
42+
]
3243

3344
# Update with user config
3445
user_names = _update_with_user_config(
@@ -48,17 +59,21 @@ def _import_config(
4859
config_path=extra_config,
4960
)
5061
extra_exec_params_keys = ("_n_jobs",)
62+
keep_names.extend(extra_exec_params_keys)
5163

5264
# Check it
5365
if check:
5466
_check_config(config, config_path)
5567
_check_misspellings_removals(
56-
config,
5768
valid_names=valid_names,
5869
user_names=user_names,
5970
log=log,
71+
config_validation=config.config_validation,
6072
)
6173

74+
# Finally, reduce to our actual supported params (all keep_names should be present)
75+
config = SimpleNamespace(**{k: getattr(config, k) for k in keep_names})
76+
6277
# Take some standard actions
6378
mne.set_log_level(verbose=config.mne_log_level.upper())
6479

@@ -406,11 +421,11 @@ def _pydantic_validate(
406421

407422

408423
def _check_misspellings_removals(
409-
config: SimpleNamespace,
410424
*,
411425
valid_names: list[str],
412426
user_names: list[str],
413427
log: bool,
428+
config_validation: str,
414429
) -> None:
415430
# for each name in the user names, check if it's in the valid names but
416431
# the correct one is not defined
@@ -427,7 +442,7 @@ def _check_misspellings_removals(
427442
"the variable to reduce ambiguity and avoid this message, "
428443
"or set config.config_validation to 'warn' or 'ignore'."
429444
)
430-
_handle_config_error(this_msg, log, config)
445+
_handle_config_error(this_msg, log, config_validation)
431446
if user_name in _REMOVED_NAMES:
432447
new = _REMOVED_NAMES[user_name]["new_name"]
433448
if new not in user_names:
@@ -438,16 +453,16 @@ def _check_misspellings_removals(
438453
f"{msg} this variable has been removed as a valid "
439454
f"config option, {instead}."
440455
)
441-
_handle_config_error(this_msg, log, config)
456+
_handle_config_error(this_msg, log, config_validation)
442457

443458

444459
def _handle_config_error(
445460
msg: str,
446461
log: bool,
447-
config: SimpleNamespace,
462+
config_validation: str,
448463
) -> None:
449-
if config.config_validation == "raise":
464+
if config_validation == "raise":
450465
raise ValueError(msg)
451-
elif config.config_validation == "warn":
466+
elif config_validation == "warn":
452467
if log:
453468
logger.warning(**gen_log_kwargs(message=msg, emoji="🛟"))

mne_bids_pipeline/_run.py

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import functools
55
import hashlib
66
import inspect
7-
import json
87
import pathlib
98
import pdb
109
import sys
@@ -38,14 +37,10 @@ def __mne_bids_pipeline_failsafe_wrapper__(*args, **kwargs):
3837
get_input_fnames=get_input_fnames,
3938
get_output_fnames=get_output_fnames,
4039
)
41-
kwargs_copy = copy.deepcopy(kwargs)
4240
t0 = time.time()
43-
kwargs_copy["cfg"] = json_tricks.dumps(
44-
kwargs_copy["cfg"], sort_keys=False, indent=4
45-
)
4641
log_info = pd.concat(
4742
[
48-
pd.Series(kwargs_copy, dtype=object),
43+
pd.Series(kwargs, dtype=object),
4944
pd.Series(index=["time", "success", "error_message"], dtype=object),
5045
]
5146
)
@@ -58,10 +53,10 @@ def __mne_bids_pipeline_failsafe_wrapper__(*args, **kwargs):
5853
log_info["error_message"] = ""
5954
except Exception as e:
6055
# Only keep what gen_log_kwargs() can handle
61-
kwargs_copy = {
62-
k: v
63-
for k, v in kwargs_copy.items()
64-
if k in ("subject", "session", "task", "run")
56+
kwargs_log = {
57+
k: kwargs[k]
58+
for k in ("subject", "session", "task", "run")
59+
if k in kwargs
6560
}
6661
message = (
6762
f"A critical error occurred. " f"The error message was: {str(e)}"
@@ -88,13 +83,13 @@ def __mne_bids_pipeline_failsafe_wrapper__(*args, **kwargs):
8883
if _is_testing():
8984
raise
9085
logger.error(
91-
**gen_log_kwargs(message=message, **kwargs_copy, emoji="❌")
86+
**gen_log_kwargs(message=message, **kwargs_log, emoji="❌")
9287
)
9388
sys.exit(1)
9489
elif on_error == "debug":
9590
message += "\n\nStarting post-mortem debugger."
9691
logger.error(
97-
**gen_log_kwargs(message=message, **kwargs_copy, emoji="🐛")
92+
**gen_log_kwargs(message=message, **kwargs_log, emoji="🐛")
9893
)
9994
extype, value, tb = sys.exc_info()
10095
print(tb)
@@ -103,7 +98,7 @@ def __mne_bids_pipeline_failsafe_wrapper__(*args, **kwargs):
10398
else:
10499
message += "\n\nContinuing pipeline run."
105100
logger.error(
106-
**gen_log_kwargs(message=message, **kwargs_copy, emoji="🔂")
101+
**gen_log_kwargs(message=message, **kwargs_log, emoji="🔂")
107102
)
108103
log_info["time"] = round(time.time() - t0, ndigits=1)
109104
return log_info
@@ -285,29 +280,8 @@ def save_logs(*, config: SimpleNamespace, logs: list[pd.Series]) -> None:
285280
sheet_name = _short_step_path(_get_step_path()).replace("/", "-")
286281
sheet_name = sheet_name[-30:] # shorten due to limit of excel format
287282

288-
# We need to make the logs more compact to be able to write Excel format
289-
# (32767 char limit per cell), in particular the "cfg" column has very large
290-
# cells, so replace the "cfg" column with separated cfg.* columns (still truncated
291-
# to the 32767 char limit)
292-
compact_logs = list()
293-
for log in logs:
294-
log = log.copy()
295-
# 1. Remove indentation (e.g., 220814 chars to 54416)
296-
cfg = json.loads(log["cfg"])
297-
del log["cfg"]
298-
assert cfg["__instance_type__"] == ["types", "SimpleNamespace"], cfg[
299-
"__instance_type__"
300-
]
301-
for key, val in cfg["attributes"].items():
302-
if isinstance(val, dict) and list(val.keys()) == ["__pathlib__"]:
303-
val = val["__pathlib__"]
304-
val = json.dumps(val, separators=(",", ":"))
305-
if len(val) > 32767:
306-
val = val[:32765] + " …"
307-
log[f"cfg.{key}"] = val
308-
compact_logs.append(log)
309-
df = pd.DataFrame(compact_logs)
310-
del logs, compact_logs
283+
df = pd.DataFrame(logs)
284+
del logs
311285

312286
with FileLock(fname.with_suffix(fname.suffix + ".lock")):
313287
append = fname.exists()
@@ -317,7 +291,27 @@ def save_logs(*, config: SimpleNamespace, logs: list[pd.Series]) -> None:
317291
mode="a" if append else "w",
318292
if_sheet_exists="replace" if append else None,
319293
)
294+
assert isinstance(config, SimpleNamespace), type(config)
295+
cf_df = dict()
296+
for key, val in config.__dict__.items():
297+
# We need to be careful about functions, json_tricks does not work with them
298+
if inspect.isfunction(val):
299+
new_val = ""
300+
if func_file := inspect.getfile(val):
301+
new_val += f"{func_file}:"
302+
if getattr(val, "__qualname__", None):
303+
new_val += val.__qualname__
304+
val = "custom callable" if not new_val else new_val
305+
val = json_tricks.dumps(val, indent=4, sort_keys=False)
306+
# 32767 char limit per cell (could split over lines but if something is
307+
# this long, you'll probably get the gist from the first 32k chars)
308+
if len(val) > 32767:
309+
val = val[:32765] + " …"
310+
cf_df[key] = val
311+
cf_df = pd.DataFrame([cf_df], dtype=object)
320312
with writer:
313+
# Config first then the data
314+
cf_df.to_excel(writer, sheet_name="config", index=False)
321315
df.to_excel(writer, sheet_name=sheet_name, index=False)
322316

323317

0 commit comments

Comments
 (0)