Fix for issue #278 version 2

sanger-pathogens · Aug 23, 2019 · 868c995 · 868c995
1 parent af3b047
commit 868c995
Show file tree

Hide file tree

Showing 32 changed files with 232 additions and 19 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Change Log
 
+[v2.14.3](https://github.com/sanger-pathogens/ariba/tree/v2.14.3) (2019-08-23)
+[Full Changelog](https://github.com/sanger-pathogens/ariba/compare/v2.14.2...v2.14.3)
+
+**Fixed bugs:**
+
+- Version 3.0.3 of CARD breaks prepareref [\#278](https://github.com/sanger-pathogens/ariba/issues/278)
+- RT 667288: Change docker file Ariba git clone to a copy
+
 [v2.14.2](https://github.com/sanger-pathogens/ariba/tree/v2.14.2) (2019-06-18)
 [Full Changelog](https://github.com/sanger-pathogens/ariba/compare/v2.14.1...v2.14.2)
 

diff --git a/Dockerfile b/Dockerfile
@@ -7,13 +7,13 @@ MAINTAINER [email protected]
 # Software version numbers
 ARG BOWTIE2_VERSION=2.2.9
 ARG SPADES_VERSION=3.13.1
-ARG CDHIT_VERSION=4.8.1
 ARG ARIBA_TAG=master
 ARG ARIBA_BUILD_DIR=/ariba
 
 RUN apt-get -qq update && \
     apt-get install --no-install-recommends -y \
   build-essential \
+  cd-hit \
   curl \
   git \
   libbz2-dev \
@@ -28,16 +28,6 @@ RUN apt-get -qq update && \
   wget \
   zlib1g-dev
 
-# Install cd-hit
-# We build cd-hit "manually" rather than using apt-get - see Ariba GitHub issue 278
-RUN git clone https://github.com/weizhongli/cdhit.git \
-  && cd cdhit \
-  && git checkout V${CDHIT_VERSION} \
-  && make MAX_SEQ=10000000 \
-  && ln -s -f $PWD/cd-hit-est /usr/bin/cd-hit-est \
-  && ln -s -f /usr/bin/cd-hit-est /usr/bin/cdhit-est \
-  && cd ..
-
 # Install bowtie
 RUN wget -q http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/${BOWTIE2_VERSION}/bowtie2-${BOWTIE2_VERSION}-linux-x86_64.zip \
   && unzip bowtie2-${BOWTIE2_VERSION}-linux-x86_64.zip \

diff --git a/ariba/ref_preparer.py b/ariba/ref_preparer.py
@@ -16,6 +16,8 @@ def __init__(self,
         version_report_lines=None,
         min_gene_length=6,
         max_gene_length=10000,
+        min_noncoding_length=6,
+        max_noncoding_length=20000,
         genetic_code=11,
         cdhit_min_id=0.9,
         cdhit_min_length=0.0,
@@ -38,6 +40,8 @@ def __init__(self,
         self.all_coding = all_coding
         self.min_gene_length = min_gene_length
         self.max_gene_length = max_gene_length
+        self.min_noncoding_length = min_noncoding_length
+        self.max_noncoding_length = max_noncoding_length
         self.genetic_code = genetic_code
         self.cdhit_min_id = cdhit_min_id
         self.cdhit_min_length = cdhit_min_length
@@ -177,6 +181,8 @@ def run(self, outdir):
             self.metadata_tsv_files,
             min_gene_length=self.min_gene_length,
             max_gene_length=self.max_gene_length,
+            min_noncoding_length = self.min_noncoding_length,
+            max_noncoding_length = self.max_noncoding_length,
             genetic_code=self.genetic_code,
         )
 
@@ -213,8 +219,9 @@ def run(self, outdir):
             pickle.dump(clusters, f)
 
         if number_of_removed_seqs > 0:
-            print('WARNING.', number_of_removed_seqs, 'sequence(s) excluded. Please see the log file 01.filter.check_genes.log for details. This will show them:', file=sys.stderr)
+            print('WARNING.', number_of_removed_seqs, 'sequence(s) excluded. Please see the 01.filter.check_genes.log and 01.filter.check_noncoding.log for details. This will show them:', file=sys.stderr)
             print('    grep REMOVE', os.path.join(outdir, '01.filter.check_genes.log'), file=sys.stderr)
+            print('    cat', os.path.join(outdir, '01.filter.check_noncoding.log'), file=sys.stderr)
 
         if number_of_bad_variants_logged > 0:
             print('WARNING. Problem with at least one variant. Problem variants are removed. Please see the file', os.path.join(outdir, '01.filter.check_metadata.log'), 'for details.', file=sys.stderr)
diff --git a/ariba/reference_data.py b/ariba/reference_data.py
@@ -19,13 +19,17 @@ def __init__(self,
         rename_file=None,
         min_gene_length=6,
         max_gene_length=10000,
+        min_noncoding_length=6,
+        max_noncoding_length=20000,
         genetic_code=11,
         parameters_file=None,
     ):
         self.seq_filenames = {}
         self.seq_dicts = {}
         self.min_gene_length = min_gene_length
         self.max_gene_length = max_gene_length
+        self.min_noncoding_length = min_noncoding_length
+        self.max_noncoding_length = max_noncoding_length
 
         self.sequences, self.metadata = ReferenceData._load_input_files_and_check_seq_names(fasta_files, metadata_tsv_files)
         if len(self.sequences) == 0:
@@ -208,7 +212,7 @@ def _filter_bad_variant_data(cls, sequences, all_metadata, out_prefix, removed_s
 
         for sequence_name, metadata_dict in sorted(all_metadata.items()):
             if sequence_name in removed_sequences:
-                print(sequence_name, 'was removed because does not look like a gene, so removing its metadata', file=log_fh)
+                print(sequence_name, 'was removed because it failed filtering checks, so removing its metadata', file=log_fh)
                 log_lines += 1
                 del all_metadata[sequence_name]
                 continue
@@ -278,6 +282,16 @@ def _try_to_get_gene_seq(cls, seq, min_length, max_length):
                 return got[0], 'KEEP\tMade into gene. strand=' + got[1] + ', frame=' + str(got[2])
 
 
+    @classmethod
+    def _check_noncoding_seq(cls, seq, min_length, max_length):
+        if len(seq) < min_length:
+            return False, 'REMOVE\tToo short. Length: ' + str(len(seq))
+        elif len(seq) > max_length:
+            return False, 'REMOVE\tToo long. Length: ' + str(len(seq))
+        else:
+            return True, None
+
+
     @classmethod
     def _remove_bad_genes(cls, sequences, metadata, log_file, min_gene_length, max_gene_length):
         to_remove = set()
@@ -308,11 +322,46 @@ def _remove_bad_genes(cls, sequences, metadata, log_file, min_gene_length, max_g
         return to_remove
 
 
+    @classmethod
+    def _remove_bad_noncoding_seqs(cls, sequences, metadata, log_file, min_noncoding_length, max_noncoding_length):
+        to_remove = set()
+
+        if len(sequences) == 0:
+            return to_remove
+
+        log_fh = pyfastaq.utils.open_file_write(log_file)
+
+        for name in sorted(sequences):
+            if metadata[name]['seq_type'] != 'n':
+                continue
+
+            valid, message = ReferenceData._check_noncoding_seq(sequences[name], min_noncoding_length, max_noncoding_length)
+            if not valid:
+                to_remove.add(name)
+
+            if message is not None:
+                print(name, message, sep='\t', file=log_fh)
+
+        pyfastaq.utils.close(log_fh)
+
+        for name in to_remove:
+            sequences.pop(name)
+
+        return to_remove
+
     def sanity_check(self, outprefix):
-        removed_seqs = self._remove_bad_genes(self.sequences, self.metadata, outprefix + '.check_genes.log', self.min_gene_length, self.max_gene_length)
-        log_lines = ReferenceData._filter_bad_variant_data(self.sequences, self.metadata, outprefix, removed_seqs)
-        return len(removed_seqs), log_lines
 
+        removed_gene_seqs = self._remove_bad_genes(self.sequences,
+                                              self.metadata, outprefix + '.check_genes.log',
+                                              self.min_gene_length, self.max_gene_length)
+
+        removed_noncoding_seqs = self._remove_bad_noncoding_seqs(self.sequences, self.metadata,
+                                                       outprefix + '.check_noncoding.log', self.min_noncoding_length,
+                                                       self.max_noncoding_length)
+
+        all_removed_seqs = removed_gene_seqs.union(removed_noncoding_seqs)
+        log_lines = ReferenceData._filter_bad_variant_data(self.sequences, self.metadata, outprefix, all_removed_seqs)
+        return len(all_removed_seqs), log_lines
 
     @classmethod
     def _new_seq_name(cls, name):

diff --git a/ariba/tasks/prepareref.py b/ariba/tasks/prepareref.py
@@ -18,6 +18,8 @@ def run(options):
         version_report_lines=version_report_lines,
         min_gene_length=options.min_gene_length,
         max_gene_length=options.max_gene_length,
+        min_noncoding_length=options.min_noncoding_length,
+        max_noncoding_length=options.max_noncoding_length,
         genetic_code=options.genetic_code,
         cdhit_min_id=options.cdhit_min_id,
         cdhit_min_length=options.cdhit_min_length,

diff --git a/ariba/tests/data/ref_preparer_test_run.in.4.fa b/ariba/tests/data/ref_preparer_test_run.in.4.fa
@@ -0,0 +1,17 @@
+>noncoding1-toolong
+CTACTGATCATCTACTATCTGCATCGATGCCTGATCTA
+>noncoding2
+CTACTGAT
+>cannot_make_into_a_gene
+AAAAAAAAAAAAAAAA
+>noncoding3-tooshort
+C
+>gene1
+ATGGATCGTGAAGCGATGACCCATGAAGCGACCGAACGCTAA
+>noncoding4-toolong
+CTACTGATCATCTACTATCTG
+>noncoding5-tooshort
+CTCTC
+>gene2
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTAA
+
diff --git a/ariba/tests/data/ref_preparer_test_run.in.4.tsv b/ariba/tests/data/ref_preparer_test_run.in.4.tsv
@@ -0,0 +1,8 @@
+cannot_make_into_a_gene	1	0	.	.	.
+noncoding1-toolong	0	0	.	.	.
+noncoding2	0	0	C4T	.	.
+noncoding3-tooshort	0	0	C4T	.	.
+noncoding4-toolong	0	0	C4T	.	.
+noncoding5-tooshort	0	0	C4T	.	.
+gene1	1	0	.	.	.
+gene2	1	0	.	.	.
diff --git a/ariba/tests/data/ref_preparer_test_run.out/01.filter.check_metadata.log b/ariba/tests/data/ref_preparer_test_run.out/01.filter.check_metadata.log
@@ -1 +1 @@
-cannot_make_into_a_gene was removed because does not look like a gene, so removing its metadata
+cannot_make_into_a_gene was removed because it failed filtering checks, so removing its metadata
diff --git a/ariba/tests/data/ref_preparer_test_run.out/01.filter.check_noncoding.log b/ariba/tests/data/ref_preparer_test_run.out/01.filter.check_noncoding.log
diff --git a/ariba/tests/data/ref_preparer_test_run_all_noncoding.out/01.filter.check_noncoding.log b/ariba/tests/data/ref_preparer_test_run_all_noncoding.out/01.filter.check_noncoding.log
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.info.txt b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.info.txt
@@ -0,0 +1,3 @@
+input fasta file:	/Users/kp11/workspace/applications/Ariba/ariba/ariba/tests/data/ref_preparer_test_run.in.4.fa
+input tsv file:	/Users/kp11/workspace/applications/Ariba/ariba/ariba/tests/data/ref_preparer_test_run.in.4.tsv
+genetic_code	1
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.rename_info b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.rename_info
@@ -0,0 +1,4 @@
+noncoding1-toolong	noncoding1_toolong
+noncoding3-tooshort	noncoding3_tooshort
+noncoding4-toolong	noncoding4_toolong
+noncoding5-tooshort	noncoding5_tooshort
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.version_info.txt b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/00.version_info.txt
@@ -0,0 +1,5 @@
+ARIBA run with this command:
+setup.py prepareref test
+from this directory: /Users/kp11/workspace/applications/Ariba/ariba
+
+
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_genes.log b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_genes.log
@@ -0,0 +1,3 @@
+cannot_make_into_a_gene	REMOVE	Does not look like a gene (tried both strands and all reading frames) AAAAAAAAAAAAAAAA
+gene1	KEEP	Made into gene. strand=+, frame=0
+gene2	KEEP	Made into gene. strand=+, frame=0
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_metadata.log b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_metadata.log
@@ -0,0 +1,5 @@
+cannot_make_into_a_gene was removed because it failed filtering checks, so removing its metadata
+noncoding1_toolong was removed because it failed filtering checks, so removing its metadata
+noncoding3_tooshort was removed because it failed filtering checks, so removing its metadata
+noncoding4_toolong was removed because it failed filtering checks, so removing its metadata
+noncoding5_tooshort was removed because it failed filtering checks, so removing its metadata
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_metadata.tsv b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_metadata.tsv
@@ -0,0 +1,3 @@
+gene1	1	0	.	.	.
+gene2	1	0	.	.	.
+noncoding2	0	0	C4T	.	.
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_noncoding.log b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/01.filter.check_noncoding.log
@@ -0,0 +1,4 @@
+noncoding1_toolong	REMOVE	Too long. Length: 38
+noncoding3_tooshort	REMOVE	Too short. Length: 1
+noncoding4_toolong	REMOVE	Too long. Length: 21
+noncoding5_tooshort	REMOVE	Too short. Length: 5
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.all.fa b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.all.fa
@@ -0,0 +1,6 @@
+>gene1
+ATGGATCGTGAAGCGATGACCCATGAAGCGACCGAACGCTAA
+>gene2
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTAA
+>noncoding2
+CTACTGAT
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.clusters.pickle b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.clusters.pickle
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.clusters.tsv b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.clusters.tsv
@@ -0,0 +1 @@
+cluster	gene1	gene2
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.gene.fa b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.gene.fa
@@ -0,0 +1,4 @@
+>gene1
+ATGGATCGTGAAGCGATGACCCATGAAGCGACCGAACGCTAA
+>gene2
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCTAA
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.gene.varonly.fa b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.gene.varonly.fa
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.noncoding.fa b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.noncoding.fa
@@ -0,0 +1,2 @@
+>noncoding2
+CTACTGAT
diff --git a/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.noncoding.varonly.fa b/ariba/tests/data/ref_preparer_test_run_noncoding_checks.out/02.cdhit.noncoding.varonly.fa
diff --git a/ariba/tests/data/refdata_query_prepareref/01.filter.check_noncoding.log b/ariba/tests/data/refdata_query_prepareref/01.filter.check_noncoding.log
diff --git a/ariba/tests/data/reference_data_remove_bad_noncoding.in.fa b/ariba/tests/data/reference_data_remove_bad_noncoding.in.fa
@@ -0,0 +1,10 @@
+>noncoding1
+AAAA
+>noncoding2
+GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
+>noncoding3
+CCCCCC
+>noncoding4
+TTTTTTTTTTTTTTT
+>noncoding5
+AAAAAAAAAAAA
diff --git a/ariba/tests/data/reference_data_remove_bad_noncoding.in.tsv b/ariba/tests/data/reference_data_remove_bad_noncoding.in.tsv
@@ -0,0 +1,5 @@
+noncoding1	0	0	.	.	.
+noncoding2	0	0	.	.	.
+noncoding3	0	0	.	.	.
+noncoding4	0	0	.	.	.
+noncoding5	0	0	.	.	.
diff --git a/ariba/tests/data/reference_data_test_remove_bad_noncoding.log b/ariba/tests/data/reference_data_test_remove_bad_noncoding.log
@@ -0,0 +1,2 @@
+noncoding1	REMOVE	Too short. Length: 4
+noncoding2	REMOVE	Too long. Length: 133
diff --git a/ariba/tests/ref_preparer_test.py b/ariba/tests/ref_preparer_test.py
@@ -117,6 +117,7 @@ def test_run(self):
         test_files = [
             '01.filter.check_metadata.tsv',
             '01.filter.check_genes.log',
+            '01.filter.check_noncoding.log',
             '01.filter.check_metadata.log',
             '02.cdhit.all.fa',
             '02.cdhit.clusters.tsv',
@@ -152,6 +153,7 @@ def test_run_all_noncoding(self):
             '00.auto_metadata.tsv',
             '01.filter.check_metadata.tsv',
             '01.filter.check_genes.log',
+            '01.filter.check_noncoding.log',
             '01.filter.check_metadata.log',
             '02.cdhit.all.fa',
             '02.cdhit.clusters.tsv',
@@ -168,6 +170,41 @@ def test_run_all_noncoding(self):
 
         common.rmtree(tmp_out)
 
+    def test_run_noncoding_checks(self):
+        '''test run with noncoding sequences that are outside of the allowed size range'''
+        fasta_in = [
+            os.path.join(data_dir, 'ref_preparer_test_run.in.4.fa')
+        ]
+        tsv_in = [
+            os.path.join(data_dir, 'ref_preparer_test_run.in.4.tsv')
+        ]
+
+        extern_progs = external_progs.ExternalProgs()
+        refprep = ref_preparer.RefPreparer(
+            fasta_in, extern_progs, min_noncoding_length=6, max_noncoding_length=20,
+            metadata_tsv_files=tsv_in, genetic_code=1)
+        tmp_out = 'tmp.ref_preparer_test_run_noncoding_checks'
+        refprep.run(tmp_out)
+        expected_outdir = os.path.join(data_dir, 'ref_preparer_test_run_noncoding_checks.out')
 
+        test_files = [
+            '01.filter.check_metadata.tsv',
+            '01.filter.check_genes.log',
+            '01.filter.check_noncoding.log',
+            '01.filter.check_metadata.log',
+            '02.cdhit.all.fa',
+            '02.cdhit.clusters.tsv',
+            '02.cdhit.gene.fa',
+            '02.cdhit.gene.varonly.fa',
+            '02.cdhit.noncoding.fa',
+            '02.cdhit.noncoding.varonly.fa',
+        ]
+
+        for filename in test_files:
+            expected = os.path.join(expected_outdir, filename)
+            got = os.path.join(tmp_out, filename)
+            self.assertTrue(filecmp.cmp(expected, got, shallow=False))
+
+        common.rmtree(tmp_out)
 
 
diff --git a/ariba/tests/reference_data_test.py b/ariba/tests/reference_data_test.py
@@ -265,6 +265,19 @@ def test_try_to_get_gene_seq(self):
         for seq, got_seq, message in tests:
             self.assertEqual((got_seq, message), reference_data.ReferenceData._try_to_get_gene_seq(seq, 6, 99))
 
+    def test_check_noncoding_seq(self):
+        '''Test _check_noncoding_seq'''
+        tests = [
+            (pyfastaq.sequences.Fasta('x', 'A' * 3), False, 'REMOVE\tToo short. Length: 3'),
+            (pyfastaq.sequences.Fasta('x', 'A' * 21), False, 'REMOVE\tToo long. Length: 21'),
+            (pyfastaq.sequences.Fasta('x', 'A' * 5), True, None),
+            (pyfastaq.sequences.Fasta('x', 'A' * 4), True, None),
+            (pyfastaq.sequences.Fasta('x', 'A' * 20), True, None)
+        ]
+
+        for seq, valid, message in tests:
+            self.assertEqual((valid, message), reference_data.ReferenceData._check_noncoding_seq(seq, 4, 20))
+
 
     def test_remove_bad_genes(self):
         '''Test _remove_bad_genes'''
@@ -287,6 +300,29 @@ def test_remove_bad_genes(self):
         os.unlink(tmp_log)
 
 
+    def test_remove_bad_noncoding_seqs(self):
+        '''Test _remove_bad_noncoding_seqs'''
+        test_seq_dict = {}
+        fasta_file = os.path.join(data_dir, 'reference_data_remove_bad_noncoding.in.fa')
+        metadata_file = os.path.join(data_dir, 'reference_data_remove_bad_noncoding.in.tsv')
+        metadata = reference_data.ReferenceData._load_all_metadata_tsvs([metadata_file])
+        pyfastaq.tasks.file_to_dict(fasta_file, test_seq_dict)
+        tmp_log = 'tmp.test_remove_bad_noncoding.log'
+        expected_removed = {'noncoding1','noncoding2'}
+        got_removed = reference_data.ReferenceData._remove_bad_noncoding_seqs(test_seq_dict, metadata, tmp_log,
+                                                                     min_noncoding_length=6, max_noncoding_length=15)
+        self.assertEqual(expected_removed, got_removed)
+        expected_dict = {
+            'noncoding3': pyfastaq.sequences.Fasta('noncoding3', 'CCCCCC'),
+            'noncoding4': pyfastaq.sequences.Fasta('noncoding4', 'TTTTTTTTTTTTTTT'),
+            'noncoding5': pyfastaq.sequences.Fasta('noncoding5', 'AAAAAAAAAAAA')
+        }
+        self.assertEqual(expected_dict, test_seq_dict)
+        expected_log = os.path.join(data_dir, 'reference_data_test_remove_bad_noncoding.log')
+        self.assertTrue(filecmp.cmp(expected_log, tmp_log, shallow=False))
+        os.unlink(tmp_log)
+
+
     def test_new_seq_name(self):
         '''Test _new_seq_name'''
         tests = [
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		cannot_make_into_a_gene was removed because does not look like a gene, so removing its metadata
		cannot_make_into_a_gene was removed because it failed filtering checks, so removing its metadata
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		noncoding1 REMOVE Too short. Length: 4
		noncoding2 REMOVE Too long. Length: 133