Skip to content

Function to write metadata tsv files in Genfi-to-BIDS could be split and tested #1505

@AliceJoubert

Description

@AliceJoubert

In Genfi-to-BIDS, writing the different metadata types of files is done in only one function :

def write_bids(
to: Path,
participants: DataFrame,
sessions: DataFrame,
scans: DataFrame,
) -> None:
"""This function writes the BIDS
Parameters
----------
to: Path
The path where the BIDS should be written
participants: DataFrame
DataFrame containing the data for the participants.tsv
sessions: DataFrame
DataFrame containing the data for the sessions.tsv
scans: DataFrame
DataFrame containing the data for the scans.tsv
"""
import os
from fsspec.implementations.local import LocalFileSystem
from clinica.converters._utils import run_dcm2niix, write_to_tsv
from clinica.dataset import BIDSDatasetDescription
from clinica.utils.stream import cprint
cprint("Starting to write the BIDS.", lvl="info")
fs = LocalFileSystem(auto_mkdir=True)
# Ensure BIDS hierarchy is written first.
participants = (
participants.reset_index()
.drop(["session_id", "modality", "run_num", "bids_filename", "source"], axis=1)
.drop_duplicates()
.set_index("participant_id")
)
with fs.transaction:
with fs.open(to / "dataset_description.json", "w") as dataset_description_file:
BIDSDatasetDescription(name="GENFI").write(to=dataset_description_file)
with fs.open(to / "participants.tsv", "w") as participant_file:
write_to_tsv(participants, participant_file)
for participant_id, data_frame in sessions.groupby("participant_id"):
sessions = data_frame.droplevel(
["participant_id", "modality", "bids_filename", "run_num"]
).drop_duplicates()
sessions_filepath = to / str(participant_id) / f"{participant_id}_sessions.tsv"
with fs.open(sessions_filepath, "w") as sessions_file:
write_to_tsv(sessions, sessions_file)
scans = scans.reset_index().set_index(["bids_full_path"], verify_integrity=True)
for bids_full_path, metadata in scans.iterrows():
bids_full_path = Path(bids_full_path)
try:
os.makedirs(to / bids_full_path.parent)
except OSError:
pass
dcm2niix_success = run_dcm2niix(
Path(metadata["source_path"]).parent,
to / Path(bids_full_path).parent,
metadata["bids_filename"],
True,
)
if dcm2niix_success:
scans_filepath = (
to
/ str(metadata.participant_id)
/ str(metadata.session_id)
/ f"{metadata.participant_id}_{metadata.session_id}_scan.tsv"
)
row_to_write = _serialize_row(
metadata.drop(["participant_id", "session_id"]),
write_column_names=not scans_filepath.exists(),
)
with open(scans_filepath, "a") as scans_file:
scans_file.write(f"{row_to_write}\n")
if (
"dwi" in metadata["bids_filename"]
and "Philips" in metadata.manufacturer
):
_merge_philips_diffusion(
to / bids_full_path.with_suffix(".json"),
metadata.number_of_parts,
metadata.run_num,
)
_correct_fieldmaps_name(to)
_delete_real_and_imaginary_files(to)

Rather than having one big function which is not tested, it would be better to use this one as an interface to three separate functions specific to one type of file to test them more easily.

Metadata

Metadata

Labels

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions