diff --git a/ariba/cluster.py b/ariba/cluster.py index dd10077f..07a87b7d 100644 --- a/ariba/cluster.py +++ b/ariba/cluster.py @@ -129,6 +129,7 @@ def __init__(self, self.final_assembly_bam = os.path.join(self.root_dir, 'assembly.reads_mapped.bam') self.final_assembly_read_depths = os.path.join(self.root_dir, 'assembly.reads_mapped.bam.read_depths.gz') self.final_assembly_vcf = os.path.join(self.root_dir, 'assembly.reads_mapped.bam.vcf') + self.final_assembled_genes_fa = os.path.join(self.root_dir, 'assembly.genes.fa') self.final_assembly = {} self.mummer_variants = {} self.variant_depths = {} @@ -525,6 +526,38 @@ def _nucmer_hits_to_gene_cov_per_contig(self): return cov + @staticmethod + def _nucmer_hits_to_assembled_gene_sequences(nucmer_hits, ref_gene, assembly, outfile): + f = pyfastaq.utils.open_file_write(outfile) + + for contig in sorted(nucmer_hits): + for hit in nucmer_hits[contig]: + qry_coords = hit.qry_coords() + fa = assembly[hit.qry_name].subseq(qry_coords.start, qry_coords.end + 1) + if hit.on_same_strand(): + strand = '+' + else: + fa.revcomp() + strand = '-' + ref_coords = hit.ref_coords() + fa.id = '.'.join([ + ref_gene.id, + str(ref_coords.start + 1), + str(ref_coords.end + 1), + contig, + str(qry_coords.start + 1), + str(qry_coords.end + 1), + strand + ]) + + if hit.hit_length_ref == hit.ref_length: + fa.id += '.complete' + + print(fa, file=f) + + pyfastaq.utils.close(f) + + def _whole_gene_covered_by_nucmer_hits(self): covered = self._nucmer_hits_to_ref_coords() pyfastaq.intervals.merge_overlapping_in_list(covered) @@ -1046,6 +1079,7 @@ def run(self): self._update_flag_from_nucmer_file() self._make_assembly_vcf() self._get_vcf_variant_counts() + self._nucmer_hits_to_assembled_gene_sequences(self.nucmer_hits, self.gene, self.final_assembly, self.final_assembled_genes_fa) self._make_report_lines() self._clean() diff --git a/ariba/clusters.py b/ariba/clusters.py index ba06009d..6f328cff 100644 --- a/ariba/clusters.py +++ b/ariba/clusters.py @@ -44,7 +44,6 @@ def __init__(self, run_cd_hit=True, clean=1, ): - self.db_fasta = os.path.abspath(db_fasta) self.reads_1 = os.path.abspath(reads_1) self.reads_2 = os.path.abspath(reads_2) self.outdir = os.path.abspath(outdir) @@ -57,12 +56,13 @@ def __init__(self, self.assembly_kmer = assembly_kmer self.spades_other = spades_other - self.db_fasta_clustered = os.path.join(self.outdir, 'genes.clustered.fa') + self.db_fasta_clustered = os.path.join(self.outdir, 'input_genes.clustered.fa') self.cluster_ids = {} self.bam_prefix = os.path.join(self.outdir, 'map_all_reads') self.bam = self.bam_prefix + '.bam' self.report_file_tsv = os.path.join(self.outdir, 'report.tsv') self.report_file_xls = os.path.join(self.outdir, 'report.xls') + self.catted_assembled_genes_fasta = os.path.join(self.outdir, 'assembled_genes.fa') self.threads = threads self.verbose = verbose @@ -120,6 +120,10 @@ def __init__(self, except: raise Error('Error mkdir ' + d) + self.db_fasta = os.path.join(self.outdir, 'input_genes.not_clustered.fa') + pyfastaq.tasks.to_fasta(db_fasta, self.db_fasta, check_unique=True) + + def _run_cdhit(self): r = cdhit.Runner( self.db_fasta, @@ -357,12 +361,27 @@ def _write_reports(self): workbook.save(self.report_file_xls) + def _write_catted_assembled_genes_fasta(self): + f = pyfastaq.utils.open_file_write(self.catted_assembled_genes_fasta) + + for gene in sorted(self.clusters): + cluster_fasta = self.clusters[gene].final_assembled_genes_fa + if os.path.exists(cluster_fasta): + file_reader = pyfastaq.sequences.file_reader(cluster_fasta) + for seq in file_reader: + print(seq, file=f) + + pyfastaq.utils.close(f) + + def _clean(self): to_clean = [ [ ], [ - self.bam + self.bam, + self.db_fasta, + self.db_fasta + '.fai', ], [ self.db_fasta_clustered, @@ -400,14 +419,22 @@ def run(self): print('Finished mapping\n') print('{:_^79}'.format(' Generating clusters '), flush=True) self._bam_to_clusters_reads() - self._set_insert_size_data() - if self.verbose: - print('{:_^79}'.format(' Assembling each cluster '), flush=True) - self._init_and_run_clusters() + if len(self.cluster_to_dir) > 0: + self._set_insert_size_data() + if self.verbose: + print('{:_^79}'.format(' Assembling each cluster '), flush=True) + self._init_and_run_clusters() + if self.verbose: + print('Finished assembling clusters\n') + else: + if self.verbose: + print('No reads mapped. Skipping all assemblies', flush=True) + print('WARNING: no reads mapped to reference genes. Therefore no local assemblies will be run', file=sys.stderr) + if self.verbose: - print('Finished assembling clusters\n') print('{:_^79}'.format(' Writing report files '), flush=True) self._write_reports() + self._write_catted_assembled_genes_fasta() if self.verbose: print('Finished writing report files. Cleaning files', flush=True) self._clean() diff --git a/ariba/common.py b/ariba/common.py index 4f0f5b84..c4798115 100644 --- a/ariba/common.py +++ b/ariba/common.py @@ -1,7 +1,7 @@ import sys import subprocess -version = '0.4.1' +version = '0.5.0' def syscall(cmd, allow_fail=False, verbose=False): if verbose: diff --git a/ariba/refcheck.py b/ariba/refcheck.py index 566bf09d..551b648f 100644 --- a/ariba/refcheck.py +++ b/ariba/refcheck.py @@ -5,78 +5,86 @@ class Error (Exception): pass class Checker: - def __init__(self, infile, min_length=1, max_length=10000): + def __init__(self, infile, min_length=1, max_length=10000, outprefix=None): self.infile = os.path.abspath(infile) if not os.path.exists(self.infile): raise Error('File not found: "' + self.infile + '". Cannot continue') self.min_length = min_length self.max_length = max_length + self.outprefix = outprefix - def check(self, error_code_on_exit=None): + def run(self): file_reader = pyfastaq.sequences.file_reader(self.infile) - - for seq in file_reader: - if not seq.looks_like_gene(): - return False, 'Not a gene', seq - elif len(seq) < self.min_length: - return False, 'Too short', seq - elif len(seq) > self.max_length: - return False, 'Too long', seq - - return True, None, None - - - def fix(self, outprefix): - file_reader = pyfastaq.sequences.file_reader(self.infile) - old2new_out = outprefix + '.rename' - fasta_out = outprefix + '.fa' - bad_seqs_out = outprefix + '.removed.fa' - log_out = outprefix + '.log' names = {} - old2new_out_fh = pyfastaq.utils.open_file_write(old2new_out) - fasta_out_fh = pyfastaq.utils.open_file_write(fasta_out) - bad_seqs_out_fh = pyfastaq.utils.open_file_write(bad_seqs_out) - log_out_fh = pyfastaq.utils.open_file_write(log_out) + + if self.outprefix is not None: + old2new_out = self.outprefix + '.rename' + fasta_out = self.outprefix + '.fa' + bad_seqs_out = self.outprefix + '.removed.fa' + log_out = self.outprefix + '.log' + old2new_out_fh = pyfastaq.utils.open_file_write(old2new_out) + fasta_out_fh = pyfastaq.utils.open_file_write(fasta_out) + bad_seqs_out_fh = pyfastaq.utils.open_file_write(bad_seqs_out) + log_out_fh = pyfastaq.utils.open_file_write(log_out) for seq in file_reader: seq.seq = seq.seq.upper() if len(seq) < self.min_length: - print(seq.id, 'Too short. Skipping', sep='\t', file=log_out_fh) - print(seq, file=bad_seqs_out_fh) - continue + if self.outprefix is None: + return False, 'Too short', seq + else: + print(seq.id, 'Too short. Skipping', sep='\t', file=log_out_fh) + print(seq, file=bad_seqs_out_fh) + continue elif len(seq) > self.max_length: - print(seq.id, 'Too long. Skipping', sep='\t', file=log_out_fh) - print(seq, file=bad_seqs_out_fh) - continue - + if self.outprefix is None: + return False, 'Too long', seq + else: + print(seq.id, 'Too long. Skipping', sep='\t', file=log_out_fh) + print(seq, file=bad_seqs_out_fh) + continue if not seq.looks_like_gene(): - seq.revcomp() - if seq.looks_like_gene(): - print(seq.id, 'Reverse complemented', sep='\t', file=log_out_fh) + if self.outprefix is None: + return False, 'Not a gene', seq else: - print(seq.id, 'Does not look like a gene. Skipping', sep='\t', file=log_out_fh) seq.revcomp() - print(seq, file=bad_seqs_out_fh) - continue - + if seq.looks_like_gene(): + print(seq.id, 'Reverse complemented', sep='\t', file=log_out_fh) + else: + print(seq.id, 'Does not look like a gene. Skipping', sep='\t', file=log_out_fh) + seq.revcomp() + print(seq, file=bad_seqs_out_fh) + continue + original_id = seq.id # replace unwanted characters with underscores to_replace = ' ' seq.id = seq.id.translate(str.maketrans(to_replace, '_' * len(to_replace))) + if self.outprefix is None and original_id != seq.id: + seq.id = original_id + return False, 'Name has spaces', seq + if seq.id in names: - names[seq.id] += 1 - seq.id += '.' + str(names[seq.id]) + if self.outprefix is None: + return False, 'Duplicate name', seq + else: + names[seq.id] += 1 + seq.id += '.' + str(names[seq.id]) else: names[seq.id] = 1 - print(original_id, seq.id, sep='\t', file=old2new_out_fh) - print(seq, file=fasta_out_fh) + if self.outprefix is not None: + print(original_id, seq.id, sep='\t', file=old2new_out_fh) + print(seq, file=fasta_out_fh) - pyfastaq.utils.close(fasta_out_fh) - pyfastaq.utils.close(bad_seqs_out_fh) - pyfastaq.utils.close(log_out_fh) - pyfastaq.utils.close(old2new_out_fh) + if self.outprefix is not None: + pyfastaq.utils.close(fasta_out_fh) + pyfastaq.utils.close(bad_seqs_out_fh) + pyfastaq.utils.close(log_out_fh) + pyfastaq.utils.close(old2new_out_fh) + + return True, None, None diff --git a/ariba/tasks/refcheck.py b/ariba/tasks/refcheck.py index 7f8f2f2e..4184494d 100644 --- a/ariba/tasks/refcheck.py +++ b/ariba/tasks/refcheck.py @@ -18,14 +18,14 @@ def run(): checker = ariba.refcheck.Checker( options.infile, min_length=options.min_length, - max_length=options.max_length + max_length=options.max_length, + outprefix=options.outprefix ) - - if options.outprefix: - checker.fix(options.outprefix) - else: - ok, reason, seq = checker.check() - if not ok: - print('The following sequence not OK, for the reason:', reason) - print(seq) - sys.exit(1) + + ok, reason, seq = checker.run() + + if options.outprefix is None and not ok: + print('The following sequence not OK, for the reason:', reason) + print(seq) + sys.exit(1) + diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py index 5a8f35a8..3106b339 100644 --- a/ariba/tests/cluster_test.py +++ b/ariba/tests/cluster_test.py @@ -34,7 +34,7 @@ def clean_cluster_dir(d, exclude=None): def file2lines(filename): f = pyfastaq.utils.open_file_read(filename) lines = f.readlines() - pyfastaq.utils.close(f) + pyfastaq.utils.close(f) return lines @@ -303,8 +303,8 @@ def test_nucmer_hits_to_percent_identity(self): ] } expected = {'scaff1': round((90*10 + 100*34) / (10+34), 2), 'scaff2': 42.42} - c._nucmer_hits_to_percent_identity() - self.assertEqual(expected, c.percent_identities) + c._nucmer_hits_to_percent_identity() + self.assertEqual(expected, c.percent_identities) def test_nucmer_hits_to_scaff_coords(self): @@ -396,12 +396,41 @@ def test_nucmer_hits_to_gene_cov_per_contig(self): pymummer.alignment.Alignment('\t'.join(hits[2])), ] } - + expected = {'contig1': 85, 'contig2': 11} self.assertEqual(expected, c._nucmer_hits_to_gene_cov_per_contig()) clean_cluster_dir(cluster_dir) + def test_nucmer_hits_to_assembled_gene_sequences(self): + '''test _nucmer_hits_to_assembled_gene_sequences''' + ref_gene = pyfastaq.sequences.Fasta('ref_gene', 'ATGGTACAAGACGGCCCTTTGCAGTCCTGTGTACTTGCGGGTCGCTCCTTTGCATTGAATTATCGAACATCGTCGCGTTCAAGATCCCGCGAAAAAAATTATAGATCGCAGGATATCACTGCCAGTGGCATCTGTGTAAGCGCTTAG') + assembly = { + 'contig1': pyfastaq.sequences.Fasta('contig1', 'CATCTATGCTGCATCGATCACTGACGTATCATCATCAGCGTACTGACGTATTAGTTTGTAATGGTACAAGACGGCCCTTTGCAGTCCTGTGTACTTGCGGGTCGCTCCTTTGCATTGAATTATCGAACATCGTCGCGTTCAAGATCCCGCGAAAAAAATTATAGATCGCAGGATATCACTGCCAGTGGCATCTGTGTAAGCGCTTAGACGTCGTACTACTGTATATGCATCGATCTGAA'), + 'contig2': pyfastaq.sequences.Fasta('contig2', 'AGTGATATCCTGCGATCTATAATTTTTTTCGCGGGATCTTGAACGCGACGATGTTCGATAATTCAATGCAAAGGAGCGACCCGCAAGTACACAGGACTGCAAA') + } + + hits = [ + ['1', '147', '61', '207', '147', '147', '100.00', '147', '239', '1', '1', 'ref_gene', 'contig1'], + ['18', '120', '103', '1', '103', '103', '100.00', '147', '103', '1', '-1', 'ref_gene', 'contig2'] + ] + nucmer_hits = { + 'contig1': [ + pymummer.alignment.Alignment('\t'.join(hits[0])), + ], + 'contig2': [ + pymummer.alignment.Alignment('\t'.join(hits[1])), + ] + } + + assembly_fasta = os.path.join(data_dir, 'cluster_test_nucmer_hits_to_assembled_gene_sequences.assembly.fa') + tmp_outfile = 'tmp.test_nucmer_hits_to_assembled_gene_sequences.out.fa' + expected_outfile = os.path.join(data_dir, 'cluster_test_nucmer_hits_to_assembled_gene_sequences.expected.out.fa') + cluster.Cluster._nucmer_hits_to_assembled_gene_sequences(nucmer_hits, ref_gene, assembly, tmp_outfile) + self.assertTrue(filecmp.cmp(tmp_outfile, expected_outfile, shallow=False)) + os.unlink(tmp_outfile) + + def test_whole_gene_covered_by_nucmer_hits(self): '''test _whole_gene_covered_by_nucmer_hits''' cluster_dir = os.path.join(data_dir, 'cluster_test_generic') @@ -637,7 +666,7 @@ def test_get_assembly_read_depths(self): ( ('ref1', 3), ('C', 'A,G', 42, '21,11,10') ), ( ('ref1', 4), ('C', 'AC', 41, '0,42') ) ] - + for t in tests: self.assertEqual(c._get_assembly_read_depths(t[0][0], t[0][1]), t[1]) @@ -652,7 +681,7 @@ def test_get_samtools_variant_positions(self): ('16__cat_2_M35190.scaffold.1', 179), ('16__cat_2_M35190.scaffold.1', 263), ('16__cat_2_M35190.scaffold.6', 93) - ] + ] self.assertEqual(expected, c._get_samtools_variant_positions()) @@ -764,4 +793,4 @@ def test_make_report_lines_assembly_fail(self): ] self.assertEqual(expected, c.report_lines) clean_cluster_dir(cluster_dir) - + diff --git a/ariba/tests/clusters_test.py b/ariba/tests/clusters_test.py index 4cf47fca..3d9a6d5a 100644 --- a/ariba/tests/clusters_test.py +++ b/ariba/tests/clusters_test.py @@ -65,8 +65,8 @@ def test_sam_pair_to_insert(self): def test_bam_to_clusters_reads(self): '''test _bam_to_clusters_reads''' clusters_dir = 'tmp.Cluster.test_bam_to_clusters_reads' - reads1 = os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.reads_1.fq') - reads2 = os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.reads_2.fq') + reads1 = os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.reads_1.fq') + reads2 = os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.reads_2.fq') ref = os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.db.fa') c = clusters.Clusters(ref, reads1, reads2, clusters_dir) shutil.copyfile(os.path.join(data_dir, 'clusters_test_bam_to_clusters_reads.bam'), c.bam) @@ -110,7 +110,7 @@ def test_set_insert_size_data(self): 10: 1, } self.clusters.insert_hist.bin_width=1 - + self.clusters._set_insert_size_data() self.assertEqual(self.clusters.insert_size, 5.5) self.assertEqual(self.clusters.insert_sspace_sd, 0.91) @@ -131,3 +131,18 @@ def __init__(self, lines): self.assertTrue(filecmp.cmp(expected, self.clusters.report_file_tsv, shallow=False)) self.assertTrue(os.path.exists(self.clusters.report_file_xls)) + + def test_write_catted_assembled_genes_fasta(self): + '''test _write_catted_assembled_genes_fasta''' + class FakeCluster: + def __init__(self, filename): + self.final_assembled_genes_fa = filename + + self.clusters.clusters = { + 'gene1': FakeCluster(os.path.join(data_dir, 'clusters_test_write_catted_assembled_genes_fasta.in.gene1.fa')), + 'gene2': FakeCluster(os.path.join(data_dir, 'clusters_test_write_catted_assembled_genes_fasta.in.gene2.fa')), + } + + self.clusters._write_catted_assembled_genes_fasta() + expected = os.path.join(data_dir, 'clusters_test_write_catted_assembled_genes_fasta.expected.out.fa') + self.assertTrue(filecmp.cmp(expected, self.clusters.catted_assembled_genes_fasta, shallow=False)) diff --git a/ariba/tests/data/cluster_test_nucmer_hits_to_assembled_gene_sequences.expected.out.fa b/ariba/tests/data/cluster_test_nucmer_hits_to_assembled_gene_sequences.expected.out.fa new file mode 100644 index 00000000..87aa2147 --- /dev/null +++ b/ariba/tests/data/cluster_test_nucmer_hits_to_assembled_gene_sequences.expected.out.fa @@ -0,0 +1,7 @@ +>ref_gene.1.147.contig1.61.207.+.complete +ATGGTACAAGACGGCCCTTTGCAGTCCTGTGTACTTGCGGGTCGCTCCTTTGCATTGAAT +TATCGAACATCGTCGCGTTCAAGATCCCGCGAAAAAAATTATAGATCGCAGGATATCACT +GCCAGTGGCATCTGTGTAAGCGCTTAG +>ref_gene.18.120.contig2.1.103.- +TTTGCAGTCCTGTGTACTTGCGGGTCGCTCCTTTGCATTGAATTATCGAACATCGTCGCG +TTCAAGATCCCGCGAAAAAAATTATAGATCGCAGGATATCACT diff --git a/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.expected.out.fa b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.expected.out.fa new file mode 100644 index 00000000..d340bc20 --- /dev/null +++ b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.expected.out.fa @@ -0,0 +1,6 @@ +>gene1.1 +ACGT +>gene1.2 +CAT +>gene2 +GTGT diff --git a/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene1.fa b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene1.fa new file mode 100644 index 00000000..27aef244 --- /dev/null +++ b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene1.fa @@ -0,0 +1,4 @@ +>gene1.1 +ACGT +>gene1.2 +CAT diff --git a/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene2.fa b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene2.fa new file mode 100644 index 00000000..2697b9f2 --- /dev/null +++ b/ariba/tests/data/clusters_test_write_catted_assembled_genes_fasta.in.gene2.fa @@ -0,0 +1,2 @@ +>gene2 +GTGT diff --git a/ariba/tests/data/refcheck_test_check_duplicate_name.fa b/ariba/tests/data/refcheck_test_check_duplicate_name.fa new file mode 100644 index 00000000..0672ca7f --- /dev/null +++ b/ariba/tests/data/refcheck_test_check_duplicate_name.fa @@ -0,0 +1,4 @@ +>gene1 +TTGTGGTGA +>gene1 +TTGTGGTGA diff --git a/ariba/tests/data/refcheck_test_check_spaces_in_name.fa b/ariba/tests/data/refcheck_test_check_spaces_in_name.fa new file mode 100644 index 00000000..8870a191 --- /dev/null +++ b/ariba/tests/data/refcheck_test_check_spaces_in_name.fa @@ -0,0 +1,2 @@ +>gene foo +TTGTGGTGA diff --git a/ariba/tests/refcheck_test.py b/ariba/tests/refcheck_test.py index 6171eb1d..bc13e015 100644 --- a/ariba/tests/refcheck_test.py +++ b/ariba/tests/refcheck_test.py @@ -13,7 +13,7 @@ def test_check_pass(self): '''test check file OK''' infile = os.path.join(data_dir, 'refcheck_test_check_ok.fa') c = refcheck.Checker(infile) - self.assertEqual(c.check(), (True, None, None)) + self.assertEqual(c.run(), (True, None, None)) def test_check_file_fail_not_gene(self): @@ -21,7 +21,7 @@ def test_check_file_fail_not_gene(self): infile = os.path.join(data_dir, 'refcheck_test_check_not_gene.fa') c = refcheck.Checker(infile) seq = pyfastaq.sequences.Fasta('gene1', 'TTGTGATGA') - self.assertEqual(c.check(), (False, 'Not a gene', seq)) + self.assertEqual(c.run(), (False, 'Not a gene', seq)) def test_check_file_fail_too_short(self): @@ -29,7 +29,7 @@ def test_check_file_fail_too_short(self): infile = os.path.join(data_dir, 'refcheck_test_check_too_short.fa') c = refcheck.Checker(infile, min_length=10) seq = pyfastaq.sequences.Fasta('gene1', 'TTGTGGTGA') - self.assertEqual(c.check(), (False, 'Too short', seq)) + self.assertEqual(c.run(), (False, 'Too short', seq)) def test_check_file_fail_too_long(self): @@ -37,15 +37,31 @@ def test_check_file_fail_too_long(self): infile = os.path.join(data_dir, 'refcheck_test_check_too_long.fa') c = refcheck.Checker(infile, max_length=6) seq = pyfastaq.sequences.Fasta('gene1', 'TTGTGGTGA') - self.assertEqual(c.check(), (False, 'Too long', seq)) + self.assertEqual(c.run(), (False, 'Too long', seq)) - def test_check_fix(self): - '''test fix''' + def test_check_file_fail_spades_in_name(self): + '''test check file with sequence that has spaces in its name''' + infile = os.path.join(data_dir, 'refcheck_test_check_spaces_in_name.fa') + c = refcheck.Checker(infile, min_length=3) + seq = pyfastaq.sequences.Fasta('gene foo', 'TTGTGGTGA') + self.assertEqual(c.run(), (False, 'Name has spaces', seq)) + + + def test_check_file_fail_duplicate_name(self): + '''test check file with sequence that has two genes with the same name''' + infile = os.path.join(data_dir, 'refcheck_test_check_duplicate_name.fa') + c = refcheck.Checker(infile, min_length=3) + seq = pyfastaq.sequences.Fasta('gene1', 'TTGTGGTGA') + self.assertEqual(c.run(), (False, 'Duplicate name', seq)) + + + def test_check_run_with_outfiles(self): + '''test run when making output files''' infile = os.path.join(data_dir, 'refcheck_test_fix_in.fa') tmp_prefix = 'tmp.refcheck_test_fix.out' - c = refcheck.Checker(infile, min_length=10, max_length=25) - c.fix(tmp_prefix) + c = refcheck.Checker(infile, min_length=10, max_length=25, outprefix=tmp_prefix) + c.run() for x in ['fa', 'log', 'rename', 'removed.fa']: expected = os.path.join(data_dir, 'refcheck_test_fix_out.' + x) got = tmp_prefix + '.' + x diff --git a/setup.py b/setup.py index 47617b41..7d4c109c 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='ariba', - version='0.4.1', + version='0.5.0', description='ARIBA: Antibiotic Resistance Identification By Assembly', packages = find_packages(), author='Martin Hunt',