diff --git a/nibabies/cli/hbcd.py b/nibabies/cli/hbcd.py new file mode 100644 index 00000000..acb25c0b --- /dev/null +++ b/nibabies/cli/hbcd.py @@ -0,0 +1,112 @@ +""" +This script restructures workflow outputs to be ingested by the HBCD database. + +The following changes are made to the outputs: + +- FreeSurfer output is changed to follow the BIDS hierarchy: + + freesurfer/ + sub- + ses-/ + mri/ + surf/ + ... + +- MCRIBS output is changed to follow the BIDS hierarchy: + + mcribs/ + sub- + ses-/ + SurfReconDeformable/ + TissueSegDrawEM/ + ... + +- Symbolic links are replaced with the files they point to. + +WARNING: This alters the directories in place into a structure that the +underlying software used to create them will not recognize. Use with caution. +""" + +import argparse +import shutil +from pathlib import Path + + +def _parser(): + from functools import partial + + from .parser import _path_exists + + parser = argparse.ArgumentParser( + prog='nibabies-hbcd', + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + PathExists = partial(_path_exists, parser=parser) + + parser.add_argument( + '--fs', + type=PathExists, + help='Path to the FreeSurfer output directory', + ) + parser.add_argument( + '--mcribs', + type=PathExists, + help='Path to the MCRIBS output directory.', + ) + return parser + + +def copy_symlinks(directory: Path): + for fl in directory.rglob('*'): + if fl.is_symlink(): + target = fl.resolve() + print(f'Found symlink {fl} pointing to {target}') + fl.unlink() + shutil.copy2(target, fl) + + +def restructure(directory: Path): + """Change the structure of a directory in place to resemble BIDS hierarchy.""" + for sid in directory.glob('sub-*'): + try: + subject, session = sid.name.split('_', 1) + print(sid) + except ValueError: + continue + + if not subject.startswith('sub-'): + raise AttributeError(f'Incorrect subject ID {subject}') + if not session.startswith('ses-'): + raise AttributeError(f'Incorrect session ID {session}') + + # First traverse and ensure no symbolic links are present + copy_symlinks(sid) + + target_directory = directory / subject / session + print(f'Making target directory {target_directory}') + target_directory.mkdir(parents=True, exist_ok=True) + + print(f'Copying {sid} to {target_directory}') + shutil.copytree(sid, target_directory, dirs_exist_ok=True) + shutil.rmtree(sid) + print(f'Completed restructuring {directory}') + + +def main(argv=None): + """Entry point `nibabies-hbcd`.""" + parser = _parser() + pargs = parser.parse_args(argv) + + fs = pargs.fs + if fs is None: + print('FreeSurfer directory not provided. Skipping') + else: + restructure(fs) + + mcribs = pargs.mcribs + if mcribs is None: + print('MCRIBS directory not provided. Skipping') + else: + restructure(mcribs) diff --git a/nibabies/cli/parser.py b/nibabies/cli/parser.py index 36647bb5..98385ba7 100644 --- a/nibabies/cli/parser.py +++ b/nibabies/cli/parser.py @@ -6,6 +6,8 @@ import sys import typing as ty +from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser +from pathlib import Path from .. import config @@ -13,143 +15,156 @@ from bids.layout import BIDSLayout -def _build_parser(): - """Build parser object.""" - from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser - from functools import partial - from pathlib import Path +DEPRECATIONS = { + # parser attribute name: (replacement flag, version slated to be removed in) +} - from niworkflows.utils.spaces import OutputReferencesAction, Reference - from packaging.version import Version - from .version import check_latest, is_flagged +class DeprecatedAction(Action): + def __call__(self, parser, namespace, values, option_string=None): + new_opt, rem_vers = DEPRECATIONS.get(self.dest, (None, None)) + msg = ( + f'{self.option_strings} has been deprecated and will be removed in ' + f'{rem_vers or "a later version"}.' + ) + if new_opt: + msg += f' Please use `{new_opt}` instead.' + print(msg, file=sys.stderr) + delattr(namespace, self.dest) - deprecations = { - # parser attribute name: (replacement flag, version slated to be removed in) - } - class DeprecatedAction(Action): - def __call__(self, parser, namespace, values, option_string=None): - new_opt, rem_vers = deprecations.get(self.dest, (None, None)) - msg = ( - f'{self.option_strings} has been deprecated and will be removed in ' - f'{rem_vers or "a later version"}.' - ) - if new_opt: - msg += f' Please use `{new_opt}` instead.' - print(msg, file=sys.stderr) - delattr(namespace, self.dest) - - class DerivToDict(Action): - def __call__(self, parser, namespace, values, option_string=None): - d = {} - for spec in values: - try: - name, loc = spec.split('=') - loc = Path(loc) - except ValueError: - loc = Path(spec) - name = loc.name - - if name in d: - raise ValueError(f'Received duplicate derivative name: {name}') - - d[name] = loc - setattr(namespace, self.dest, d) - - def _path_exists(path, parser): - """Ensure a given path exists.""" - if path is None: - raise parser.error('No value provided!') - path = Path(path).absolute() - if not path.exists(): - raise parser.error(f'Path does not exist: <{path}>.') +class DerivToDict(Action): + def __call__(self, parser, namespace, values, option_string=None): + d = {} + for spec in values: + try: + name, loc = spec.split('=') + loc = Path(loc) + except ValueError: + loc = Path(spec) + name = loc.name + + if name in d: + raise ValueError(f'Received duplicate derivative name: {name}') + + d[name] = loc + setattr(namespace, self.dest, d) + + +def _path_exists(path, parser): + """Ensure a given path exists.""" + if path is None: + raise parser.error('No value provided!') + path = Path(path).absolute() + if not path.exists(): + raise parser.error(f'Path does not exist: <{path}>.') + return path + + +def _dir_not_empty(path, parser): + path = _path_exists(path, parser) + if not path.is_dir(): + raise parser.error(f'Path is not a directory <{path}>.') + for _ in path.iterdir(): return path + raise parser.error(f'Directory found with no contents <{path}>.') + + +def _is_file(path, parser): + """Ensure a given path exists and it is a file.""" + path = _path_exists(path, parser) + if not path.is_file(): + raise parser.error(f'Path should point to a file (or symlink of file): <{path}>.') + return path + + +def _min_one(value, parser): + """Ensure an argument is not lower than 1.""" + value = int(value) + if value < 1: + raise parser.error("Argument can't be less than one.") + return value + + +def _to_gb(value): + scale = {'G': 1, 'T': 10**3, 'M': 1e-3, 'K': 1e-6, 'B': 1e-9} + digits = ''.join([c for c in value if c.isdigit()]) + units = value[len(digits) :] or 'M' + return int(digits) * scale[units[0]] + + +def _drop_sub(value): + return value[4:] if value.startswith('sub-') else value - def _dir_not_empty(path, parser): - path = _path_exists(path, parser) - if not path.is_dir(): - raise parser.error(f'Path is not a directory <{path}>.') - for _ in path.iterdir(): - return path - raise parser.error(f'Directory found with no contents <{path}>.') - - def _is_file(path, parser): - """Ensure a given path exists and it is a file.""" - path = _path_exists(path, parser) - if not path.is_file(): - raise parser.error(f'Path should point to a file (or symlink of file): <{path}>.') - return path - def _min_one(value, parser): - """Ensure an argument is not lower than 1.""" - value = int(value) - if value < 1: - raise parser.error("Argument can't be less than one.") +def _drop_ses(value): + return value[4:] if value.startswith('ses-') else value + + +def _process_value(value): + import bids + + if value is None: + return bids.layout.Query.NONE + elif value == '*': + return bids.layout.Query.ANY + else: return value - def _to_gb(value): - scale = {'G': 1, 'T': 10**3, 'M': 1e-3, 'K': 1e-6, 'B': 1e-9} - digits = ''.join([c for c in value if c.isdigit()]) - units = value[len(digits) :] or 'M' - return int(digits) * scale[units[0]] - def _drop_sub(value): - return value[4:] if value.startswith('sub-') else value +def _filter_pybids_none_any(dct): + d = {} + for k, v in dct.items(): + if isinstance(v, list): + d[k] = [_process_value(val) for val in v] + else: + d[k] = _process_value(v) + return d - def _drop_ses(value): - return value[4:] if value.startswith('ses-') else value - def _process_value(value): - import bids +def _bids_filter(value, parser): + from json import JSONDecodeError, loads - if value is None: - return bids.layout.Query.NONE - elif value == '*': - return bids.layout.Query.ANY + if value: + if Path(value).exists(): + try: + return loads(Path(value).read_text(), object_hook=_filter_pybids_none_any) + except JSONDecodeError as e: + raise parser.error(f'JSON syntax error in: <{value}>.') from e else: - return value + raise parser.error(f'Path does not exist: <{value}>.') - def _filter_pybids_none_any(dct): - d = {} - for k, v in dct.items(): - if isinstance(v, list): - d[k] = [_process_value(val) for val in v] - else: - d[k] = _process_value(v) - return d - - def _bids_filter(value, parser): - from json import JSONDecodeError, loads - - if value: - if Path(value).exists(): - try: - return loads(Path(value).read_text(), object_hook=_filter_pybids_none_any) - except JSONDecodeError as e: - raise parser.error(f'JSON syntax error in: <{value}>.') from e - else: - raise parser.error(f'Path does not exist: <{value}>.') - - def _slice_time_ref(value, parser): - if value == 'start': - value = 0 - elif value == 'middle': - value = 0.5 - try: - value = float(value) - except ValueError as e: - raise parser.error( - f"Slice time reference must be number, 'start', or 'middle'. Received {value}." - ) from e - if not 0 <= value <= 1: - raise parser.error(f'Slice time reference must be in range 0-1. Received {value}.') - return value - def _str_none(val): - if not isinstance(val, str): - return val - return None if val.lower() == 'none' else val +def _slice_time_ref(value, parser): + if value == 'start': + value = 0 + elif value == 'middle': + value = 0.5 + try: + value = float(value) + except ValueError as e: + raise parser.error( + f"Slice time reference must be number, 'start', or 'middle'. Received {value}." + ) from e + if not 0 <= value <= 1: + raise parser.error(f'Slice time reference must be in range 0-1. Received {value}.') + return value + + +def _str_none(val): + if not isinstance(val, str): + return val + return None if val.lower() == 'none' else val + + +def _build_parser(): + """Build parser object.""" + from functools import partial + + from niworkflows.utils.spaces import OutputReferencesAction, Reference + from packaging.version import Version + + from .version import check_latest, is_flagged verstr = f'NiBabies v{config.environment.version}' currentv = Version(config.environment.version) diff --git a/nibabies/cli/tests/test_hbcd.py b/nibabies/cli/tests/test_hbcd.py new file mode 100644 index 00000000..64afd3d8 --- /dev/null +++ b/nibabies/cli/tests/test_hbcd.py @@ -0,0 +1,43 @@ +import pytest + +from ..hbcd import main + + +# create a FreeSurfer / MCRIBS directory structure +@pytest.fixture +def freesurfer(tmp_path): + root = tmp_path / 'freesurfer' + for s in ('fsaverage', 'sub-01_ses-1', 'sub-01_ses-2'): + (root / s / 'mri').mkdir(parents=True, exist_ok=True) + (root / s / 'mri' / 'T1.mgz').touch() + (root / s / 'surf').mkdir(parents=True, exist_ok=True) + (root / s / 'surf' / 'lh.pial').touch() + return root + + +@pytest.fixture +def mcribs(tmp_path): + root = tmp_path / 'mcribs' + for s in ('sub-01_ses-1', 'sub-01_ses-2'): + (root / s / 'TissueSeg').mkdir(parents=True, exist_ok=True) + orig = root / s / 'TissueSeg' / f'{s}_all_labels.nii.gz' + orig.touch() + (root / s / 'TissueSeg' / f'{s}_all_labels_manedit.nii.gz').symlink_to(orig) + return root + + +def test_hbcd_restructure(freesurfer, mcribs): + # running without options is fine + main([]) + + main(['--fs', str(freesurfer), '--mcribs', str(mcribs)]) + assert sorted(x.name for x in freesurfer.iterdir()) == ['fsaverage', 'sub-01'] + assert sorted(x.name for x in (freesurfer / 'sub-01').iterdir()) == ['ses-1', 'ses-2'] + + assert [x.name for x in mcribs.iterdir()] == ['sub-01'] + linkd = mcribs / 'sub-01' / 'ses-1' / 'TissueSeg' / 'sub-01_ses-1_all_labels_manedit.nii.gz' + assert linkd.exists() + assert not linkd.is_symlink() + + # and run again should not fail + main(['--fs', str(freesurfer), '--mcribs', str(mcribs)]) diff --git a/pyproject.toml b/pyproject.toml index e4db5b78..76b2314c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ all = ["nibabies[dev,doc,maint,telemetry,test]"] [project.scripts] nibabies = "nibabies.cli.run:main" nibabies-mcribs = "nibabies.cli.mcribs:main" +nibabies-hbcd = "nibabies.cli.hbcd:main" [tool.hatch.metadata] allow-direct-references = true