Skip to content

Commit

Permalink
Merge pull request #1005 from cfe-lab/samplesheetparser
Browse files Browse the repository at this point in the history
Correctly handle underscores in sample sheet parsing
  • Loading branch information
Donaim authored Jul 10, 2024
2 parents 06a1064 + c9c6dee commit 8d0a52c
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 2 deletions.
46 changes: 46 additions & 0 deletions micall/tests/test_sample_sheet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,52 @@ def test_extra_commas(self):
ss = sample_sheet_parser(StringIO(stub_sample_sheet))
self.assertEqual(ss["Experiment Name"], "10-Jul-2014")

def test_underscores_in_sample_name(self):
"""
Extracts the correct project code and sample name in presence of underscores.
"""

stub_sample_sheet = """
[Header]
IEMFileVersion,3
Investigator Name,RL
Project Name,10-Jul-2014_v1test
Experiment Name,10-Jul-2014_v1test
Date,07/10/2014
Workflow,GenerateFASTQ
Assay,Nextera
Description,Nextera
Chemistry,Amplicon
[Reads]
251
251
[Settings]
[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder
CFE_SomeId_10-Jul-2014_N501-N701_Sample1_Proj1,Sample1_Proj1,10-Jul-2014_testing,N/A,ACGTACGT,TGCATGCA,\
10-Jul-2014_testing,Research:Sample1_Proj1:TRUE Comments:Sample1_Proj1:thisiscommentone \
Disablecontamcheck:Sample1_Proj1:FALSE,
CFE_SomeId_10-Jul-2014_N501-N702_Sample2_Proj2,Sample2_Proj2,10-Jul-2014_testing,N/A,AAAAGGGG,CCCCTTTT,\
10-Jul-2014_testing,Research:Sample2_Foo_Proj2:FALSE Comments:Sample2_Foo_Proj2:thisiscommenttwo \
Chemistry:Sample2_Foo_Proj2:BreakingBad Disablecontamcheck:Sample2_Foo_Proj2:TRUE,
"""

ss = sample_sheet_parser(StringIO(stub_sample_sheet))
split_rows = ss['DataSplit']
assert len(split_rows) == 2

assert split_rows[0]['filename'] == 'Sample1-Proj1_S1'
assert split_rows[1]['filename'] == 'Sample2-Proj2_S2'

assert split_rows[0]['project'] == 'Proj1'
assert split_rows[1]['project'] == 'Proj2'

assert split_rows[0]['sample'] == 'Sample1'
assert split_rows[1]['sample'] == 'Sample2'

assert split_rows[0]['sample_number'] == 'S1'
assert split_rows[1]['sample_number'] == 'S2'


def test_read_sample_sheet_overrides(tmpdir):
sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv'
Expand Down
7 changes: 5 additions & 2 deletions micall/utils/sample_sheet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,12 @@ def sample_sheet_parser(handle):
samp, proj, val = None, None, None
if sample_sheet_version == 1:
sj, val = elem.split(':')
samp, proj = sj.split(project_delimiter_v1)
components = sj.split(project_delimiter_v1)
samp, proj = (project_delimiter_v1.join(components[:-1]), components[-1])
elif sample_sheet_version == 2:
samp, proj, val = elem.split(project_delimiter_v2)
components = elem.split(project_delimiter_v2)
samp, proj, val = (project_delimiter_v2.join(components[:-2]),
components[-2], components[-1])

if samp == entry['sample'] and proj == entry['project']:
if name == 'Research':
Expand Down

0 comments on commit 8d0a52c

Please sign in to comment.