Fix use of session_id in collect_data (#909)

* Draft test for parsing. * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Log the collected data. * Update test_cli_run.py * Update tests.py * Create config.toml * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Try fixing. * Update bids.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py * Update test_cli_run.py
PennLINC · Jan 14, 2025 · 0e047bb · 0e047bb
1 parent 17d3d69
commit 0e047bb
Show file tree

Hide file tree

Showing 4 changed files with 342 additions and 4 deletions.
diff --git a/qsiprep/data/tests/config.toml b/qsiprep/data/tests/config.toml
@@ -0,0 +1,94 @@
+[environment]
+cpu_count = 1
+exec_env = "docker"
+free_mem = 52.2
+overcommit_policy = "heuristic"
+overcommit_limit = "50%"
+nipype_version = "1.9.1"
+templateflow_version = "23.1.0"
+version = "1.0.0rc2.dev29+gdee1425.d20250113"
+
+[execution]
+bids_dir = "/tmp/src/qsiprep/.circleci/data/forrest_gump"
+bids_database_dir = "/tmp/src/qsiprep/.circleci/work/forrest_gump/20250113-154517_1f7046f7-c5b1-4151-84ca-755a96dfbb55/bids_db"
+bids_description_hash = "86891931159357fa937a6724ac27013f79ca56859100547ed9793245866312cd"
+boilerplate_only = false
+sloppy = true
+debug = []
+layout = "BIDS Layout: ...ep/.circleci/data/forrest_gump | Subjects: 1 | Sessions: 1 | Runs: 0"
+log_dir = "/tmp/src/qsiprep/.circleci/out/forrest_gump/logs"
+log_level = 15
+low_mem = false
+notrack = true
+output_dir = "/tmp/src/qsiprep/.circleci/out/forrest_gump"
+reports_only = false
+run_uuid = "20250113-154517_1f7046f7-c5b1-4151-84ca-755a96dfbb55"
+participant_label = [ "01",]
+processing_list = [ "01:forrestgump",]
+skip_anat_based_spatial_normalization = false
+templateflow_home = "/home/qsiprep/.cache/templateflow"
+work_dir = "/tmp/src/qsiprep/.circleci/work/forrest_gump"
+write_graph = true
+
+[workflow]
+anat_modality = "T1w"
+anat_only = false
+anatomical_template = "MNI152NLin2009cAsym"
+b0_threshold = 100
+b0_motion_corr_to = "iterative"
+b0_to_t1w_transform = "Rigid"
+b1_biascorrect_stage = "none"
+denoise_after_combining = false
+denoise_method = "none"
+distortion_group_merge = "none"
+dwi_denoise_window = "auto"
+dwi_no_biascorr = false
+dwi_only = false
+fmap_bspline = false
+force_syn = false
+hmc_model = "eddy"
+hmc_transform = "Affine"
+ignore = []
+infant = false
+intramodal_template_iters = 0
+intramodal_template_transform = "BSplineSyN"
+subject_anatomical_reference = "first-alphabetically"
+longitudinal = false
+no_b0_harmonization = false
+output_resolution = 5.0
+pepolar_method = "TOPUP"
+separate_all_dwis = false
+shoreline_iters = 2
+use_syn_sdc = false
+spaces = "MNI152NLin2009cAsym"
+
+[nipype]
+crashfile_format = "txt"
+get_linked_libs = false
+nprocs = 4
+omp_nthreads = 4
+plugin = "MultiProc"
+remove_unnecessary_outputs = true
+resource_monitor = false
+stop_on_first_crash = true
+
+[seeds]
+master = 20506
+ants = 2720
+numpy = 32806
+
+[execution.derivatives]
+
+[execution.dataset_links]
+raw = "/tmp/src/qsiprep/.circleci/data/forrest_gump"
+templateflow = "/home/qsiprep/.cache/templateflow"
+
+[nipype.plugin_args]
+maxtasksperchild = 1
+raise_insufficient = false
+
+[execution.bids_filters.t1w]
+reconstruction = "autobox"
+
+[execution.bids_filters.t2w]
+reconstruction = "autobox"
diff --git a/qsiprep/tests/test_cli_run.py b/qsiprep/tests/test_cli_run.py
@@ -0,0 +1,237 @@
+"""Tests for the command line interface"""
+
+import pytest
+from niworkflows.utils.testing import generate_bids_skeleton
+
+
+def gen_layout(bids_dir, database_dir=None):
+    """Generate a BIDSLayout object."""
+    import re
+
+    from bids.layout import BIDSLayout, BIDSLayoutIndexer
+
+    _indexer = BIDSLayoutIndexer(
+        validate=False,
+        ignore=(
+            'code',
+            'stimuli',
+            'sourcedata',
+            'models',
+            'derivatives',
+            re.compile(r'^\.'),
+            re.compile(r'sub-[a-zA-Z0-9]+(/ses-[a-zA-Z0-9]+)?/(beh|eeg|ieeg|meg|micr|perf)'),
+        ),
+    )
+
+    layout_kwargs = {'indexer': _indexer}
+
+    if database_dir:
+        layout_kwargs['database_path'] = database_dir
+
+    layout = BIDSLayout(bids_dir, **layout_kwargs)
+    return layout
+
+
+long = {
+    '01': [
+        {
+            'session': '01',
+            'anat': [{'suffix': 'T1w', 'metadata': {'EchoTime': 1}}],
+            'dwi': [
+                {
+                    'dir': 'AP',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+                {
+                    'dir': 'PA',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+            ],
+        },
+        {
+            'session': '02',
+            'anat': [{'suffix': 'T1w', 'metadata': {'EchoTime': 1}}],
+            'dwi': [
+                {
+                    'dir': 'AP',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+                {
+                    'dir': 'PA',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+            ],
+        },
+    ],
+}
+
+long2 = {
+    '01': [
+        {
+            'session': 'full',
+            'anat': [{'suffix': 'T1w', 'metadata': {'EchoTime': 1}}],
+            'dwi': [
+                {
+                    'dir': 'AP',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+                {
+                    'dir': 'PA',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+            ],
+        },
+        {
+            'session': 'diffonly',
+            'dwi': [
+                {
+                    'dir': 'AP',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+                {
+                    'dir': 'PA',
+                    'run': '01',
+                    'suffix': 'dwi',
+                    'metadata': {
+                        'RepetitionTime': 0.8,
+                        'TotalReadoutTime': 0.5,
+                        'PhaseEncodingDirection': 'j',
+                    },
+                },
+            ],
+        },
+    ],
+}
+
+
+@pytest.mark.parametrize(
+    ('name', 'skeleton', 'reference', 'expected'),
+    [
+        ('long', long, 'sessionwise', [['01', ['01']], ['01', ['02']]]),
+        ('long', long, 'unbiased', [['01', ['01', '02']]]),
+        ('long', long, 'first-alphabetically', [['01', ['01', '02']]]),
+        ('long2', long2, 'sessionwise', [['01', ['diffonly']], ['01', ['full']]]),
+        ('long2', long2, 'unbiased', [['01', ['diffonly', 'full']]]),
+        ('long2', long2, 'first-alphabetically', [['01', ['diffonly', 'full']]]),
+    ],
+)
+def _test_processing_list(tmpdir, name, skeleton, reference, expected):
+    """Test qsiprep.cli.parser.parse_args.
+
+    Unfortunately, parse_args isn't overwriting all of the Config object
+    each time, so bad layouts are lingering across tests.
+    I will re-enable this once I figure it out.
+    """
+    from qsiprep import config
+    from qsiprep.cli.parser import parse_args
+
+    full_name = f'{name}_{reference}'
+
+    bids_dir = tmpdir / full_name
+    generate_bids_skeleton(str(bids_dir), skeleton)
+
+    config.from_dict({'bids_dir': str(bids_dir)}, init=True)
+
+    parse_args(
+        [
+            str(bids_dir),
+            str(tmpdir / f'out_{full_name}'),
+            'participant',
+            '--participant-label',
+            '01',
+            '--subject-anatomical-reference',
+            reference,
+            '--output-resolution',
+            '2',
+            '--skip-bids-validation',
+        ],
+    )
+    assert config.execution.processing_list == expected, config
+
+
+@pytest.mark.parametrize(
+    ('name', 'skeleton', 'sessions', 'n_anats'),
+    [
+        ('long', long, ['01', '02'], [1, 1, 2]),
+        ('long2', long2, ['diffonly', 'full'], [0, 1, 1]),
+    ],
+)
+def test_collect_data(tmpdir, name, skeleton, sessions, n_anats):
+    """Test qsiprep.utils.bids.collect_data."""
+    import pprint
+
+    from qsiprep.utils.bids import collect_data
+
+    bids_dir = tmpdir / name
+
+    generate_bids_skeleton(str(bids_dir), skeleton)
+    participant_label = '01'
+
+    subj_data = collect_data(
+        bids_dir=str(bids_dir),
+        participant_label=participant_label,
+        session_id=sessions[0],
+        filters=None,
+        bids_validate=False,
+    )[0]
+    assert len(subj_data['t1w']) == n_anats[0], pprint.pformat(subj_data)
+
+    subj_data = collect_data(
+        bids_dir=str(bids_dir),
+        participant_label=participant_label,
+        session_id=sessions[1],
+        filters=None,
+        bids_validate=False,
+    )[0]
+    assert len(subj_data['t1w']) == n_anats[1], pprint.pformat(subj_data)
+
+    subj_data = collect_data(
+        bids_dir=str(bids_dir),
+        participant_label=participant_label,
+        session_id=sessions,
+        filters=None,
+        bids_validate=False,
+    )[0]
+    assert len(subj_data['t1w']) == n_anats[2], pprint.pformat(subj_data)
diff --git a/qsiprep/tests/tests.py b/qsiprep/tests/tests.py
@@ -31,7 +31,6 @@
 from toml import loads
 
 from qsiprep.data import load as load_data
-from qsiprep.utils import doc
 
 
 @contextmanager
@@ -61,7 +60,6 @@ def mock_config():
     config.loggers.init()
 
     config.execution.work_dir = Path(mkdtemp())
-    config.execution.fmri_dir = Path(doc.download_example_data(out_dir=mkdtemp()))
     config.execution.output_dir = Path(mkdtemp())
     config.execution.bids_database_dir = None
     config.execution._layout = None

diff --git a/qsiprep/utils/bids.py b/qsiprep/utils/bids.py
@@ -190,6 +190,8 @@ def collect_participants(bids_dir, participant_label=None, strict=False, bids_va
 
 def collect_data(bids_dir, participant_label, session_id=None, filters=None, bids_validate=True):
     """Use pybids to retrieve the input data for a given participant."""
+    import yaml
+
     if isinstance(bids_dir, BIDSLayout):
         layout = bids_dir
     else:
@@ -205,8 +207,10 @@ def collect_data(bids_dir, participant_label, session_id=None, filters=None, bid
         'dwi': {'datatype': 'dwi', 'part': ['mag', None], 'suffix': 'dwi'},
     }
     bids_filters = filters or {}
-    for acq, entities in bids_filters.items():
-        if ('session' in queries[acq]) and (session_id is not None):
+    for acq in queries.keys():
+        entities = bids_filters.get(acq, {})
+
+        if ('session' in entities.keys()) and (session_id is not None):
             config.loggers.workflow.warning(
                 'BIDS filter file value for session may conflict with values specified '
                 'on the command line'
@@ -226,6 +230,11 @@ def collect_data(bids_dir, participant_label, session_id=None, filters=None, bid
         for dtype, query in queries.items()
     }
 
+    config.loggers.workflow.log(
+        25,
+        f'Collected data:\n{yaml.dump(subj_data, default_flow_style=False, indent=4)}',
+    )
+
     return subj_data, layout