From 34805cb348079f98142c41fcc3dc41609f1ba31e Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 21 Dec 2016 10:46:38 +0000 Subject: [PATCH] Ignore additional columns in MLST profile --- ariba/mlst_profile.py | 5 ++--- ...ile_test.init_multiple_extra_columns.profile.tsv | 3 +++ ariba/tests/mlst_profile_test.py | 13 +++++++++++++ setup.py | 2 +- 4 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 ariba/tests/data/mlst_profile_test.init_multiple_extra_columns.profile.tsv diff --git a/ariba/mlst_profile.py b/ariba/mlst_profile.py index 5a24522b..3db2ac33 100644 --- a/ariba/mlst_profile.py +++ b/ariba/mlst_profile.py @@ -8,6 +8,7 @@ class MlstProfile: def __init__(self, infile, duplicate_warnings=True): self.infile = infile self.duplicate_warnings = duplicate_warnings + self.columns_to_ignore = ['clonal_complex', 'CC', 'Lineage', 'mlst_clade', 'species'] if not os.path.exists(self.infile): raise Error('Error! Input file "' + self.infile + '" not found.') @@ -22,9 +23,7 @@ def _load_input_file(self): if reader.fieldnames[0] != 'ST': raise Error('Error. Expected first column of profile file "' + self.infile + '" to be "ST"') - self.genes_list = reader.fieldnames[1:] - if self.genes_list[-1] == 'clonal_complex': - self.genes_list.pop() + self.genes_list = [column_name for column_name in reader.fieldnames[1:] if column_name not in self.columns_to_ignore] for row in reader: type_tuple = tuple(int(row[x]) for x in self.genes_list) diff --git a/ariba/tests/data/mlst_profile_test.init_multiple_extra_columns.profile.tsv b/ariba/tests/data/mlst_profile_test.init_multiple_extra_columns.profile.tsv new file mode 100644 index 00000000..966d7129 --- /dev/null +++ b/ariba/tests/data/mlst_profile_test.init_multiple_extra_columns.profile.tsv @@ -0,0 +1,3 @@ +ST nusA rpoB eno gltB lepA nuoL nrdA clonal_complex CC Lineage mlst_clade species +1 1 26 2 2 59 8 1 1 CC10 II 123 Bacteria +2 1 26 2 4 59 2 5 2 CC10 II 123 Bacteria diff --git a/ariba/tests/mlst_profile_test.py b/ariba/tests/mlst_profile_test.py index 78241e6c..f165816e 100644 --- a/ariba/tests/mlst_profile_test.py +++ b/ariba/tests/mlst_profile_test.py @@ -19,6 +19,19 @@ def test_init(self): (1, 26, 2, 4, 59, 2, 5): 2 } self.assertEqual(expected_dict, profile.profile_to_type) + + def test_init_multiple_extra_columns(self): + '''test init''' + infile = os.path.join(data_dir, 'mlst_profile_test.init_multiple_extra_columns.profile.tsv') + profile = mlst_profile.MlstProfile(infile) + expected_genes = ['nusA', 'rpoB', 'eno', 'gltB', 'lepA', 'nuoL', 'nrdA'] + self.assertEqual(expected_genes, profile.genes_list) + self.assertEqual(set(expected_genes), profile.genes_set) + expected_dict = { + (1, 26, 2, 2, 59, 8, 1): 1, + (1, 26, 2, 4, 59, 2, 5): 2 + } + self.assertEqual(expected_dict, profile.profile_to_type) def test_has_gene(self): diff --git a/setup.py b/setup.py index b86afc51..d8af3cce 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ setup( ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod], name='ariba', - version='2.6.0', + version='2.6.1', description='ARIBA: Antibiotic Resistance Identification By Assembly', packages = find_packages(), package_data={'ariba': ['test_run_data/*']},