From 565f8028c8aa095a9ecb48dffb68bf8bfe76f433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Tue, 12 Mar 2019 16:46:57 +0000 Subject: [PATCH 01/10] Added extra patterns --- .gitignore | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 98a24c8c..175a4e6f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ __pycache__/ # Distribution / packaging .Python env/ +venv/ build/ develop-eggs/ dist/ @@ -26,7 +27,7 @@ sdist/ var/ *.egg-info/ .installed.cfg -*.egg +*.egg* # PyInstaller # Usually these files are written by a python script from a template @@ -43,6 +44,7 @@ htmlcov/ .tox/ .coverage .cache +out.card* nosetests.xml coverage.xml @@ -55,3 +57,10 @@ docs/_build/ # PyBuilder target/ + +# PyCharm +.idea + +# Mac files +.DS_Store + From e572be2aed1ab7816eeee405f8063839d0849471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Tue, 12 Mar 2019 16:49:50 +0000 Subject: [PATCH 02/10] Updated version numbers --- Dockerfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5cde6de9..d1a24ec5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ ENV ARIBA_BOWTIE2=$PWD/bowtie2-2.2.9/bowtie2 ARIBA_CDHIT=cdhit-est MPLBACKEND="a RUN git clone https://github.com/sanger-pathogens/ariba.git \ && cd ariba \ - && git checkout v2.12.0 \ + && git checkout v2.13.4 \ && python3 setup.py test \ && python3 setup.py install diff --git a/setup.py b/setup.py index 6d02ce58..17c6fd87 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ setup( ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod], name='ariba', - version='2.13.3', + version='2.13.4', description='ARIBA: Antibiotic Resistance Identification By Assembly', packages = find_packages(), package_data={'ariba': ['test_run_data/*', 'tb_data/*']}, From da79fad74a49aa68b0b55c562d7a5be335eb71dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Tue, 12 Mar 2019 17:06:00 +0000 Subject: [PATCH 03/10] Changed awk usage to gawk as awk does not support bitwise operations on OS X, leading to test failures --- README.md | 2 ++ ariba/mapping.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e4572d14..17253907 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,8 @@ Download the latest release from this github repository or clone it. Run the tes python3 setup.py test +**Note for OS X:** The tests require gawk which will need to be installed separately, e.g. via Homebrew. + If the tests all pass, install: python3 setup.py install diff --git a/ariba/mapping.py b/ariba/mapping.py index 40564bae..aee16499 100644 --- a/ariba/mapping.py +++ b/ariba/mapping.py @@ -86,8 +86,10 @@ def run_bowtie2( if LooseVersion(bowtie2_version) >= LooseVersion('2.3.1'): map_cmd.append('--score-min G,1,10') + # We use gawk instead of awk here as we need bitwise comparisons + # and these are not available via awk on Mac OSX. if remove_both_unmapped: - map_cmd.append(r''' | awk ' !(and($2,4)) || !(and($2,8)) ' ''') + map_cmd.append(r''' | gawk ' !(and($2,4)) || !(and($2,8)) ' ''') tmp_sam_file = out_prefix + '.unsorted.sam' map_cmd.append(' > ' + tmp_sam_file) From 70c82f56fbf8c6658081f9a2a7a9fac4ee97fe2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Tue, 12 Mar 2019 17:11:11 +0000 Subject: [PATCH 04/10] Added ability to set CD-HIT memory limit on the command line - Issue #255 --- ariba/cdhit.py | 26 ++++++++++++++++++++------ ariba/ref_preparer.py | 5 ++++- ariba/reference_data.py | 3 ++- ariba/tasks/prepareref.py | 1 + ariba/tests/cdhit_test.py | 36 ++++++++++++++++++++++++++++++++++++ scripts/ariba | 3 ++- 6 files changed, 65 insertions(+), 9 deletions(-) diff --git a/ariba/cdhit.py b/ariba/cdhit.py index 5b118ec1..52aa298f 100644 --- a/ariba/cdhit.py +++ b/ariba/cdhit.py @@ -13,6 +13,7 @@ def __init__( seq_identity_threshold=0.9, threads=1, length_diff_cutoff=0.0, + memory_limit=None, verbose=False, min_cluster_number=0 ): @@ -20,10 +21,14 @@ def __init__( if not os.path.exists(infile): raise Error('File not found: "' + infile + '". Cannot continue') + if (memory_limit is not None) and (memory_limit < 0): + raise Error('Input parameter cdhit_max_memory is set to an invalid value. Cannot continue') + self.infile = os.path.abspath(infile) self.seq_identity_threshold = seq_identity_threshold self.threads = threads self.length_diff_cutoff = length_diff_cutoff + self.memory_limit = memory_limit self.verbose = verbose self.min_cluster_number = min_cluster_number extern_progs = external_progs.ExternalProgs(fail_on_error=True, using_spades=False) @@ -133,15 +138,11 @@ def _get_clusters_from_bak_file(filename, min_cluster_number=0): return clusters - def run(self): - tmpdir = tempfile.mkdtemp(prefix='tmp.run_cd-hit.', dir=os.getcwd()) - cdhit_fasta = os.path.join(tmpdir, 'cdhit') - cluster_info_outfile = cdhit_fasta + '.bak.clstr' - + def get_run_cmd(self, output_file): cmd = ' '.join([ self.cd_hit_est, '-i', self.infile, - '-o', cdhit_fasta, + '-o', output_file, '-c', str(self.seq_identity_threshold), '-T', str(self.threads), '-s', str(self.length_diff_cutoff), @@ -149,8 +150,21 @@ def run(self): '-bak 1', ]) + # Add in cdhit memory allocation if one has been specified + if self.memory_limit is not None: + cmd = ' '.join([cmd, '-M', str(self.memory_limit)]) + + return cmd + + + def run(self): + tmpdir = tempfile.mkdtemp(prefix='tmp.run_cd-hit.', dir=os.getcwd()) + cdhit_fasta = os.path.join(tmpdir, 'cdhit') + cluster_info_outfile = cdhit_fasta + '.bak.clstr' + cmd = self.get_run_cmd(cdhit_fasta) common.syscall(cmd, verbose=self.verbose) clusters = self._get_clusters_from_bak_file(cluster_info_outfile, self.min_cluster_number) common.rmtree(tmpdir) return clusters + diff --git a/ariba/ref_preparer.py b/ariba/ref_preparer.py index c3418141..89ace9d1 100644 --- a/ariba/ref_preparer.py +++ b/ariba/ref_preparer.py @@ -19,6 +19,7 @@ def __init__(self, genetic_code=11, cdhit_min_id=0.9, cdhit_min_length=0.0, + cdhit_max_memory=None, run_cdhit=True, clusters_file=None, threads=1, @@ -40,6 +41,7 @@ def __init__(self, self.genetic_code = genetic_code self.cdhit_min_id = cdhit_min_id self.cdhit_min_length = cdhit_min_length + self.cdhit_max_memory = cdhit_max_memory self.run_cdhit = run_cdhit self.clusters_file = clusters_file self.threads = threads @@ -193,6 +195,7 @@ def run(self, outdir): seq_identity_threshold=self.cdhit_min_id, threads=self.threads, length_diff_cutoff=self.cdhit_min_length, + memory_limit=self.cdhit_max_memory, nocluster=not self.run_cdhit, verbose=self.verbose, clusters_file=self.clusters_file, @@ -214,4 +217,4 @@ def run(self, outdir): print(' grep REMOVE', os.path.join(outdir, '01.filter.check_genes.log'), file=sys.stderr) if number_of_bad_variants_logged > 0: - print('WARNING. Problem with at least one variant. Problem variants are rmoved. Please see the file', os.path.join(outdir, '01.filter.check_metadata.log'), 'for details.', file=sys.stderr) + print('WARNING. Problem with at least one variant. Problem variants are removed. Please see the file', os.path.join(outdir, '01.filter.check_metadata.log'), 'for details.', file=sys.stderr) diff --git a/ariba/reference_data.py b/ariba/reference_data.py index 00c2ae73..369914b1 100644 --- a/ariba/reference_data.py +++ b/ariba/reference_data.py @@ -434,7 +434,7 @@ def write_cluster_allocation_file(clusters, outfile): pyfastaq.utils.close(f_out) - def cluster_with_cdhit(self, outprefix, seq_identity_threshold=0.9, threads=1, length_diff_cutoff=0.0, nocluster=False, verbose=False, clusters_file=None): + def cluster_with_cdhit(self, outprefix, seq_identity_threshold=0.9, threads=1, length_diff_cutoff=0.0, memory_limit=None, nocluster=False, verbose=False, clusters_file=None): clusters = {} ReferenceData._write_sequences_to_files(self.sequences, self.metadata, outprefix) ref_types = ('noncoding', 'noncoding.varonly', 'gene', 'gene.varonly') @@ -454,6 +454,7 @@ def cluster_with_cdhit(self, outprefix, seq_identity_threshold=0.9, threads=1, l seq_identity_threshold=seq_identity_threshold, threads=threads, length_diff_cutoff=length_diff_cutoff, + memory_limit=memory_limit, verbose=verbose, min_cluster_number = min_cluster_number, ) diff --git a/ariba/tasks/prepareref.py b/ariba/tasks/prepareref.py index 8ddedb48..401892d2 100644 --- a/ariba/tasks/prepareref.py +++ b/ariba/tasks/prepareref.py @@ -21,6 +21,7 @@ def run(options): genetic_code=options.genetic_code, cdhit_min_id=options.cdhit_min_id, cdhit_min_length=options.cdhit_min_length, + cdhit_max_memory=options.cdhit_max_memory, run_cdhit=not options.no_cdhit, clusters_file=options.cdhit_clusters, threads=options.threads, diff --git a/ariba/tests/cdhit_test.py b/ariba/tests/cdhit_test.py index da2d9d26..541622dc 100644 --- a/ariba/tests/cdhit_test.py +++ b/ariba/tests/cdhit_test.py @@ -1,7 +1,9 @@ import unittest import os +import re from ariba import cdhit, external_progs + modules_dir = os.path.dirname(os.path.abspath(cdhit.__file__)) data_dir = os.path.join(modules_dir, 'tests', 'data') extern_progs = external_progs.ExternalProgs() @@ -13,6 +15,13 @@ def test_init_fail_infile_missing(self): cdhit.Runner('oopsnotafile', 'out') + def test_init_fail_invalid_memory(self): + '''test_init_fail_invalid_memory''' + infile = os.path.join(data_dir, 'cdhit_test_run.in.fa') + with self.assertRaises(cdhit.Error): + cdhit.Runner(infile, memory_limit=-10) + + def test_get_clusters_from_bak_file(self): '''test _get_clusters_from_bak_file''' infile = os.path.join(data_dir, 'cdhit_test_get_clusters_from_bak_file.in') @@ -162,3 +171,30 @@ def test_run_get_clusters_from_file_with_renaming(self): '1': {'seq3'}, } self.assertEqual(clusters, expected_clusters) + + + def test_get_run_cmd_with_default_memory(self): + '''test_get_run_cmd_with_default_memory''' + fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') + r = cdhit.Runner(fa_infile) + run_cmd = r.get_run_cmd('foo/bar/file.out') + match = re.search('^.+cd-hit-est -i .+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1$', run_cmd) + self.assertTrue(match) + + + def test_get_run_cmd_with_non_default_memory(self): + '''test_get_run_cmd_with_non_default_memory''' + fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') + r = cdhit.Runner(fa_infile, memory_limit=900) + run_cmd = r.get_run_cmd('foo/bar/file.out') + match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 900$', run_cmd) + self.assertTrue(match) + + + def test_get_run_cmd_with_unlimited_memory(self): + '''test_get_run_cmd_with_unlimited_memory''' + fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') + r = cdhit.Runner(fa_infile, memory_limit=0) + run_cmd = r.get_run_cmd('foo/bar/file.out') + match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 0$', run_cmd) + self.assertTrue(match) diff --git a/scripts/ariba b/scripts/ariba index c66d9b2a..404c5d57 100755 --- a/scripts/ariba +++ b/scripts/ariba @@ -135,7 +135,8 @@ cdhit_group = subparser_prepareref.add_argument_group('cd-hit options') cdhit_group.add_argument('--no_cdhit', action='store_true', help='Do not run cd-hit. Each input sequence is put into its own "cluster". Incompatible with --cdhit_clusters.') cdhit_group.add_argument('--cdhit_clusters', help='File specifying how the sequences should be clustered. Will be used instead of running cdhit. Format is one cluster per line. Sequence names separated by whitespace. Incompatible with --no_cdhit', metavar='FILENAME') cdhit_group.add_argument('--cdhit_min_id', type=float, help='Sequence identity threshold (cd-hit option -c) [%(default)s]', default=0.9, metavar='FLOAT') -cdhit_group.add_argument('--cdhit_min_length', type=float, help='length difference cutoff (cd-hit option -s) [%(default)s]', default=0.0, metavar='FLOAT') +cdhit_group.add_argument('--cdhit_min_length', type=float, help='Length difference cutoff (cd-hit option -s) [%(default)s]', default=0.0, metavar='FLOAT') +cdhit_group.add_argument('--cdhit_max_memory', type=int, help='Memory limit in MB (cd-hit option -M) [%(default)s]. Use 0 for unlimited.', metavar='INT') other_prep_group = subparser_prepareref.add_argument_group('other options') other_prep_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6) From 968659b4341bf63f455d9019ab00eb628f77655e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Wed, 13 Mar 2019 11:00:04 +0000 Subject: [PATCH 05/10] Changed some test asserts --- ariba/tests/cdhit_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ariba/tests/cdhit_test.py b/ariba/tests/cdhit_test.py index 541622dc..e9b53987 100644 --- a/ariba/tests/cdhit_test.py +++ b/ariba/tests/cdhit_test.py @@ -179,7 +179,7 @@ def test_get_run_cmd_with_default_memory(self): r = cdhit.Runner(fa_infile) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1$', run_cmd) - self.assertTrue(match) + self.assertIsNotNone(match) def test_get_run_cmd_with_non_default_memory(self): @@ -188,7 +188,7 @@ def test_get_run_cmd_with_non_default_memory(self): r = cdhit.Runner(fa_infile, memory_limit=900) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 900$', run_cmd) - self.assertTrue(match) + self.assertIsNotNone(match) def test_get_run_cmd_with_unlimited_memory(self): @@ -197,4 +197,4 @@ def test_get_run_cmd_with_unlimited_memory(self): r = cdhit.Runner(fa_infile, memory_limit=0) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 0$', run_cmd) - self.assertTrue(match) + self.assertIsNotNone(match) From 736955e5ebd1e53e50d727cc83e4b8e157238efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Wed, 13 Mar 2019 11:18:34 +0000 Subject: [PATCH 06/10] Added some debug to track Docker test errors --- ariba/tests/cdhit_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ariba/tests/cdhit_test.py b/ariba/tests/cdhit_test.py index e9b53987..e2588514 100644 --- a/ariba/tests/cdhit_test.py +++ b/ariba/tests/cdhit_test.py @@ -179,7 +179,7 @@ def test_get_run_cmd_with_default_memory(self): r = cdhit.Runner(fa_infile) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1$', run_cmd) - self.assertIsNotNone(match) + self.assertIsNotNone(match, msg="Command output was " + run_cmd) def test_get_run_cmd_with_non_default_memory(self): @@ -188,7 +188,7 @@ def test_get_run_cmd_with_non_default_memory(self): r = cdhit.Runner(fa_infile, memory_limit=900) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 900$', run_cmd) - self.assertIsNotNone(match) + self.assertIsNotNone(match, msg="Command output was " + run_cmd) def test_get_run_cmd_with_unlimited_memory(self): @@ -197,4 +197,4 @@ def test_get_run_cmd_with_unlimited_memory(self): r = cdhit.Runner(fa_infile, memory_limit=0) run_cmd = r.get_run_cmd('foo/bar/file.out') match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 0$', run_cmd) - self.assertIsNotNone(match) + self.assertIsNotNone(match, msg="Command output was " + run_cmd) From 57430c7e9838763baac80e8dfda3006d08e5984f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Wed, 13 Mar 2019 11:57:53 +0000 Subject: [PATCH 07/10] Fixed some tests for the Docker build --- ariba/tests/cdhit_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ariba/tests/cdhit_test.py b/ariba/tests/cdhit_test.py index e2588514..6de95f7f 100644 --- a/ariba/tests/cdhit_test.py +++ b/ariba/tests/cdhit_test.py @@ -178,7 +178,7 @@ def test_get_run_cmd_with_default_memory(self): fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') r = cdhit.Runner(fa_infile) run_cmd = r.get_run_cmd('foo/bar/file.out') - match = re.search('^.+cd-hit-est -i .+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1$', run_cmd) + match = re.search('^.+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1$', run_cmd) self.assertIsNotNone(match, msg="Command output was " + run_cmd) @@ -187,7 +187,7 @@ def test_get_run_cmd_with_non_default_memory(self): fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') r = cdhit.Runner(fa_infile, memory_limit=900) run_cmd = r.get_run_cmd('foo/bar/file.out') - match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 900$', run_cmd) + match = re.search('^.+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 900$', run_cmd) self.assertIsNotNone(match, msg="Command output was " + run_cmd) @@ -196,5 +196,5 @@ def test_get_run_cmd_with_unlimited_memory(self): fa_infile = os.path.join(data_dir, 'cdhit_test_run_get_clusters_from_dict_rename.in.fa') r = cdhit.Runner(fa_infile, memory_limit=0) run_cmd = r.get_run_cmd('foo/bar/file.out') - match = re.search('^.+cd-hit-est -i .+ -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 0$', run_cmd) + match = re.search('^.+ -o foo/bar/file.out -c 0.9 -T 1 -s 0.0 -d 0 -bak 1 -M 0$', run_cmd) self.assertIsNotNone(match, msg="Command output was " + run_cmd) From bb118b367d77a4804990d35b90419f863f23b2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Wed, 13 Mar 2019 14:08:24 +0000 Subject: [PATCH 08/10] Changed x's to upper case to see if this fixes the Docker build --- ariba/tests/assembly_variants_test.py | 88 +++++++++++++-------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/ariba/tests/assembly_variants_test.py b/ariba/tests/assembly_variants_test.py index a149fd51..38656774 100644 --- a/ariba/tests/assembly_variants_test.py +++ b/ariba/tests/assembly_variants_test.py @@ -52,23 +52,23 @@ def test_get_mummer_variants_has_variants(self): def test_get_variant_effect(self): '''test _get_variant_effect''' ref_seq = pyfastaq.sequences.Fasta('gene', 'GATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTGA') - v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v4 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v5 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v4 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v5 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) v7.qry_base = 'GAT' - v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) v8.qry_base = 'TGA' - v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) v9.qry_base = 'ATTCCT' - v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) v10.ref_base = 'CGC' v10.ref_end = 5 - v11 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v11 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) v11.ref_base = 'CGCGAA' v11.ref_end = 8 @@ -93,9 +93,9 @@ def test_get_variant_effect(self): def test_filter_mummer_variants(self): '''test filter_mummer_variants''' ref_seq = pyfastaq.sequences.Fasta('gene', 'GATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTGA') - v1 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('12\tG\tT\t12\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('12\tG\tT\t12\tX\tX\t39\t39\tX\tX\tgene\tcontig')) mummer_variants = {'contig': [[v1, v2], v3]} assembly_variants.AssemblyVariants._filter_mummer_variants(mummer_variants, ref_seq) expected = {'contig': [[v1, v2]]} @@ -110,13 +110,13 @@ def test_one_var_one_ctg_noncdg(self): ref_sequence_name = 'non_coding' refdata_var_dict = refdata.metadata[ref_sequence_name] - v0 = pymummer.variant.Variant(pymummer.snp.Snp('2\tT\tA\t2\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) + v0 = pymummer.variant.Variant(pymummer.snp.Snp('2\tT\tA\t2\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) # ref has A at position 3, which is variant type. This gives contig the wild type C. Shouldn't report - v1 = pymummer.variant.Variant(pymummer.snp.Snp('3\tA\tC\t3\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('3\tA\tC\t3\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) # ref has T at position 5, which is wild type. This gives contig variant type A. Should report - v2 = pymummer.variant.Variant(pymummer.snp.Snp('5\tT\tA\t5\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('5\tT\tA\t5\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) meta0 = sequence_metadata.SequenceMetadata('non_coding\t0\t0\tC3A\tid1\tref has variant type A') meta2 = sequence_metadata.SequenceMetadata('non_coding\t0\t0\tT5A\tid1\tref has wild type T') @@ -153,33 +153,33 @@ def test_one_var_one_ctg_cdg(self): ref_sequence = refdata.sequence(ref_sequence_name) refdata_var_dict = refdata.metadata[ref_sequence_name] - v0 = pymummer.variant.Variant(pymummer.snp.Snp('6\tT\tA\t6\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v1 = pymummer.variant.Variant(pymummer.snp.Snp('9\tA\tT\t9\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('18\tG\tT\t18\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('21\tC\tT\t21\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v4 = pymummer.variant.Variant(pymummer.snp.Snp('7\tA\tT\t7\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v5 = pymummer.variant.Variant(pymummer.snp.Snp('12\tA\tC\t11\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v0 = pymummer.variant.Variant(pymummer.snp.Snp('6\tT\tA\t6\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('9\tA\tT\t9\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('18\tG\tT\t18\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('21\tC\tT\t21\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v4 = pymummer.variant.Variant(pymummer.snp.Snp('7\tA\tT\t7\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v5 = pymummer.variant.Variant(pymummer.snp.Snp('12\tA\tC\t11\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - self.assertTrue(v6.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + self.assertTrue(v6.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - self.assertTrue(v7.update_indel(pymummer.snp.Snp('4\t.\tA\t5\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + self.assertTrue(v7.update_indel(pymummer.snp.Snp('4\t.\tA\t5\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - self.assertTrue(v8.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v8.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + self.assertTrue(v8.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v8.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('7\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('8\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('9\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('7\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('8\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('9\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t5\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t6\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t5\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t6\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) mummer_variants = [[v0], [v1], [v2], [v3], [v4], [v5], [v6], [v7], [v8], [v9], [v10]] @@ -306,8 +306,8 @@ def test_get_variants_presence_absence(self): os.unlink(metadata_tsv) nucmer_snp_file = os.path.join(data_dir, 'assembly_variants_test_get_variants_presence_absence.snps') - v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig1')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig1')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig1')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig1')) ref_nucmer_coords = { 'contig1': [pyfastaq.intervals.Interval(0, 30)], @@ -354,8 +354,8 @@ def test_get_variants_variants_only(self): os.unlink(metadata_tsv) nucmer_snp_file = os.path.join(data_dir, 'assembly_variants_test_get_variants_variants_only.snps') - v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tx\tx\t42\t42\tx\tx\tvariants_only\tcontig1')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tx\tx\t42\t42\tx\tx\tvariants_only\tcontig1')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tX\tX\t42\t42\tX\tX\tvariants_only\tcontig1')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tX\tX\t42\t42\tX\tX\tvariants_only\tcontig1')) ctg_nucmer_coords = { 'contig1': [pyfastaq.intervals.Interval(0, 41)], From d0a8d715d9241a4e23b836dc457a964280e72f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Wed, 13 Mar 2019 15:05:07 +0000 Subject: [PATCH 09/10] Revert "Changed x's to upper case to see if this fixes the Docker build" Didn't fix the issue. This reverts commit bb118b367d77a4804990d35b90419f863f23b2cb. --- ariba/tests/assembly_variants_test.py | 88 +++++++++++++-------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/ariba/tests/assembly_variants_test.py b/ariba/tests/assembly_variants_test.py index 38656774..a149fd51 100644 --- a/ariba/tests/assembly_variants_test.py +++ b/ariba/tests/assembly_variants_test.py @@ -52,23 +52,23 @@ def test_get_mummer_variants_has_variants(self): def test_get_variant_effect(self): '''test _get_variant_effect''' ref_seq = pyfastaq.sequences.Fasta('gene', 'GATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTGA') - v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v4 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v5 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tT\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v4 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v5 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) v7.qry_base = 'GAT' - v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) v8.qry_base = 'TGA' - v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tG\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) v9.qry_base = 'ATTCCT' - v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) v10.ref_base = 'CGC' v10.ref_end = 5 - v11 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v11 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\t.\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) v11.ref_base = 'CGCGAA' v11.ref_end = 8 @@ -93,9 +93,9 @@ def test_get_variant_effect(self): def test_filter_mummer_variants(self): '''test filter_mummer_variants''' ref_seq = pyfastaq.sequences.Fasta('gene', 'GATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTGA') - v1 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tX\tX\t39\t39\tX\tX\tgene\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('12\tG\tT\t12\tX\tX\t39\t39\tX\tX\tgene\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('4\tC\tT\t4\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('6\tC\tA\t6\tx\tx\t39\t39\tx\tx\tgene\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('12\tG\tT\t12\tx\tx\t39\t39\tx\tx\tgene\tcontig')) mummer_variants = {'contig': [[v1, v2], v3]} assembly_variants.AssemblyVariants._filter_mummer_variants(mummer_variants, ref_seq) expected = {'contig': [[v1, v2]]} @@ -110,13 +110,13 @@ def test_one_var_one_ctg_noncdg(self): ref_sequence_name = 'non_coding' refdata_var_dict = refdata.metadata[ref_sequence_name] - v0 = pymummer.variant.Variant(pymummer.snp.Snp('2\tT\tA\t2\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) + v0 = pymummer.variant.Variant(pymummer.snp.Snp('2\tT\tA\t2\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) # ref has A at position 3, which is variant type. This gives contig the wild type C. Shouldn't report - v1 = pymummer.variant.Variant(pymummer.snp.Snp('3\tA\tC\t3\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('3\tA\tC\t3\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) # ref has T at position 5, which is wild type. This gives contig variant type A. Should report - v2 = pymummer.variant.Variant(pymummer.snp.Snp('5\tT\tA\t5\tX\tX\t42\t42\tX\tX\tnon_coding\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('5\tT\tA\t5\tx\tx\t42\t42\tx\tx\tnon_coding\tcontig')) meta0 = sequence_metadata.SequenceMetadata('non_coding\t0\t0\tC3A\tid1\tref has variant type A') meta2 = sequence_metadata.SequenceMetadata('non_coding\t0\t0\tT5A\tid1\tref has wild type T') @@ -153,33 +153,33 @@ def test_one_var_one_ctg_cdg(self): ref_sequence = refdata.sequence(ref_sequence_name) refdata_var_dict = refdata.metadata[ref_sequence_name] - v0 = pymummer.variant.Variant(pymummer.snp.Snp('6\tT\tA\t6\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v1 = pymummer.variant.Variant(pymummer.snp.Snp('9\tA\tT\t9\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v2 = pymummer.variant.Variant(pymummer.snp.Snp('18\tG\tT\t18\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('21\tC\tT\t21\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v4 = pymummer.variant.Variant(pymummer.snp.Snp('7\tA\tT\t7\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - v5 = pymummer.variant.Variant(pymummer.snp.Snp('12\tA\tC\t11\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) + v0 = pymummer.variant.Variant(pymummer.snp.Snp('6\tT\tA\t6\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v1 = pymummer.variant.Variant(pymummer.snp.Snp('9\tA\tT\t9\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('18\tG\tT\t18\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('21\tC\tT\t21\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v4 = pymummer.variant.Variant(pymummer.snp.Snp('7\tA\tT\t7\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + v5 = pymummer.variant.Variant(pymummer.snp.Snp('12\tA\tC\t11\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) - v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - self.assertTrue(v6.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + v6 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + self.assertTrue(v6.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - self.assertTrue(v7.update_indel(pymummer.snp.Snp('4\t.\tA\t5\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + v7 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + self.assertTrue(v7.update_indel(pymummer.snp.Snp('4\t.\tA\t5\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - self.assertTrue(v8.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v8.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + v8 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + self.assertTrue(v8.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v8.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('7\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('8\tG\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v9.update_indel(pymummer.snp.Snp('9\tA\t.\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + v9 = pymummer.variant.Variant(pymummer.snp.Snp('4\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('5\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('6\tT\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('7\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('8\tG\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v9.update_indel(pymummer.snp.Snp('9\tA\t.\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) - v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig')) - self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t5\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) - self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t6\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig'))) + v10 = pymummer.variant.Variant(pymummer.snp.Snp('4\t.\tA\t4\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig')) + self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t5\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) + self.assertTrue(v10.update_indel(pymummer.snp.Snp('4\t.\tT\t6\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig'))) mummer_variants = [[v0], [v1], [v2], [v3], [v4], [v5], [v6], [v7], [v8], [v9], [v10]] @@ -306,8 +306,8 @@ def test_get_variants_presence_absence(self): os.unlink(metadata_tsv) nucmer_snp_file = os.path.join(data_dir, 'assembly_variants_test_get_variants_presence_absence.snps') - v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig1')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tX\tX\t42\t42\tX\tX\tpresence_absence\tcontig1')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig1')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tx\tx\t42\t42\tx\tx\tpresence_absence\tcontig1')) ref_nucmer_coords = { 'contig1': [pyfastaq.intervals.Interval(0, 30)], @@ -354,8 +354,8 @@ def test_get_variants_variants_only(self): os.unlink(metadata_tsv) nucmer_snp_file = os.path.join(data_dir, 'assembly_variants_test_get_variants_variants_only.snps') - v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tX\tX\t42\t42\tX\tX\tvariants_only\tcontig1')) - v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tX\tX\t42\t42\tX\tX\tvariants_only\tcontig1')) + v2 = pymummer.variant.Variant(pymummer.snp.Snp('14\tC\tA\t14\tx\tx\t42\t42\tx\tx\tvariants_only\tcontig1')) + v3 = pymummer.variant.Variant(pymummer.snp.Snp('15\tG\tC\t15\tx\tx\t42\t42\tx\tx\tvariants_only\tcontig1')) ctg_nucmer_coords = { 'contig1': [pyfastaq.intervals.Interval(0, 41)], From f8b8b2a0e7d00fd7bee6ffc84bb44c560030861c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Ckpepper=E2=80=9D?= Date: Thu, 14 Mar 2019 16:27:41 +0000 Subject: [PATCH 10/10] Changes to get Docker build working. Temporarily restricted pymummer version to 0.10.3 as later changes seem to break the ariba unit tests. --- Dockerfile | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d1a24ec5..b9614596 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,8 @@ RUN wget -q http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.2.9/bo # _tkinter.TclError: no display name and no $DISPLAY environment variable ENV ARIBA_BOWTIE2=$PWD/bowtie2-2.2.9/bowtie2 ARIBA_CDHIT=cdhit-est MPLBACKEND="agg" +RUN cd /usr/local/bin && ln -s /usr/bin/python3 python && cd + RUN git clone https://github.com/sanger-pathogens/ariba.git \ && cd ariba \ && git checkout v2.13.4 \ diff --git a/setup.py b/setup.py index 17c6fd87..05e946fb 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,7 @@ 'matplotlib', 'pyfastaq >= 3.12.0', 'pysam >= 0.9.1', - 'pymummer>=0.10.2', + 'pymummer<=0.10.3', ], license='GPLv3', classifiers=[