diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 588bd3cf..2c7f9919 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,9 @@ jobs: auto-activate-base: false environment-file: environment.yml activate-environment: bakta + - name: Install DeepSig + if: ${{ matrix.os == 'ubuntu-latest' }} + run: conda install deepsig - name: Install PyTest run: conda install pytest - name: Conda info diff --git a/Dockerfile b/Dockerfile index 0e1ac480..3a38308b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,7 @@ RUN apk update && apk add wget tar bash \ && cp /root/.bashrc /opt/conda/bashrc COPY environment.yml /tmp/ +RUN echo -e '\n - deepsig>=1.2.5' >> /tmp/environment.yml SHELL ["bash", "-l" ,"-c"] diff --git a/README.md b/README.md index 984250ee..4f53dbcb 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ Installation instructions, get-started and guides: Singularity [docs](https://sy $ python3 -m pip install --user bakta ``` -Bacta requires the following 3rd party executables which must be installed & executable: +Bacta requires the following 3rd party software tools which must be installed and executable to use the full set of features: - tRNAscan-SE (2.0.6) - Aragorn (1.2.38) @@ -114,6 +114,7 @@ Bacta requires the following 3rd party executables which must be installed & exe - Diamond (2.0.11) - Blast+ (2.7.1) - AMRFinderPlus (3.10.16) +- DeepSig (1.2.5) ### Database download @@ -349,7 +350,7 @@ Annotation: Path to existing Prodigal training file to use for CDS prediction --translation-table {11,4} Translation table: 11/4 (default = 11) - --gram {+,-,?} Gram type: +/-/? (default = '?') + --gram {+,-,?} Gram type for signal peptide predictions: +/-/? (default = '?') --locus LOCUS Locus prefix (default = 'contig') --locus-tag LOCUS_TAG Locus tag prefix (default = autogenerated) @@ -427,12 +428,13 @@ Conceptual terms: 2. discard spurious CDS via AntiFam 3. Detection of UPSs via MD5 digests and lookup of related IPS and PCS 4. Sequence alignments of remainder via Diamond vs. PSC (query/subject coverage=0.8, identity=0.5) -5. Assign protein sequences to UniRef90 or UniRef50 clusters if alignment hits meet an identity larger than 0.9 or 0.5, respectively +5. Assignment to UniRef90 or UniRef50 clusters if alignment hits achieve identities larger than 0.9 or 0.5, respectively 6. Execution of expert systems: - AMR: AMRFinderPlus - Expert proteins: NCBI BlastRules, VFDB - - User proteins -7. Combination of available IPS, PSC, PSCC and expert system information favouring more specific annotations and avoiding redundancy + - User proteins (optionally via `--proteins `) +7. Prediction of signal peptides (optionally via `--gram <+/->`) +8. Combination of IPS, PSC, PSCC and expert system information favouring more specific annotations and avoiding redundancy CDS without IPS or PSC hits as well as those without gene symbols or product descriptions different from `hypothetical` will be marked as `hypothetical`. @@ -449,6 +451,7 @@ Such hypothetical CDS are further analyzed: 4. Detection of UPS via MD5 hashes and lookup of related IPS 5. Sequence alignments of remainder via Diamond vs. an sORF subset of PSCs (coverage=0.9, identity=0.9) 6. Exclude sORF without sufficient annotation information +7. Prediction of signal peptides (optionally via `--gram <+/->`) sORF not identified via IPS or PSC will be discarded. Additionally, all sORF without gene symbols or product descriptions different from `hypothetical` will be discarded. Due due to uncertain nature of sORF prediction, only those identified via IPS / PSC hits exhibiting proper gene symbols or product descriptions different from `hypothetical` will be included in the final annotation. @@ -587,6 +590,7 @@ Bakta is *standing on the shoulder of giants* taking advantage of many great sof - BLAST+ <10.1186/1471-2105-10-421> - HMMER <10.1371/journal.pcbi.1002195> - AMRFinderPlus <10.1038/s41598-021-91456-0> +- DeepSig ### Databases @@ -607,6 +611,9 @@ Bakta is *standing on the shoulder of giants* taking advantage of many great sof - __AMRFinder fails__ If AMRFinder constantly crashes even on fresh setups and Bakta's database was downloaded manually, then AMRFinder needs to setup its own internal database. This is required only once: `amrfinder_update --force_update --database /amrfinderplus-db`. You could also try Bakta's internal database download logic automatically taking care of this: `bakta_db download --output ` +- __DeepSig not found in Conda environment__ +For the prediction of signal predictions, Bakta uses DeepSig that is currently not available for MacOS. Therefore, we decided to exclude DeepSig from Bakta's default Conda dependencies because otherwise it would not be installable on MacOS systems. On Linux systems it can be installed via `conda install -c conda-forge -c bioconda python=3.8 deepsig`. + - __Nice, but I'm mising XYZ...__ Bakta is quite new and we're keen to constantly improve it and further expand its feature set. In case there's anything missing, please do not hesitate to open an issue and ask for it! diff --git a/bakta/constants.py b/bakta/constants.py index 68db3cc1..a5043353 100644 --- a/bakta/constants.py +++ b/bakta/constants.py @@ -69,6 +69,7 @@ FEATURE_ORF = 'orf' FEATURE_SORF = 'sorf' FEATURE_CDS = 'cds' +FEATURE_SIGNAL_PEPTIDE = 'signal-peptide' FEATURE_GAP = 'gap' FEATURE_ORIC = 'oriC' FEATURE_ORIV = 'oriV' @@ -94,6 +95,7 @@ INSDC_FEATURE_REPEAT_UNIT_RANGE = 'rpt_unit_range' # /rpt_unit_range= INSDC_FEATURE_REPEAT_UNIT_SEQ = 'rpt_unit_seq' # /rpt_unit_seq="text" INSDC_FEATURE_CDS = 'CDS' +INSDC_FEATURE_SIGNAL_PEPTIDE = 'sig_peptide' INSDC_FEATURE_GAP = 'gap' INSDC_FEATURE_ASSEMBLY_GAP = 'assembly_gap' INSDC_FEATURE_MISC_FEATURE = 'misc_feature' @@ -130,6 +132,13 @@ STRAND_UNKNOWN = '?' STRAND_NA = '.' +############################################################################ +# Gram types +############################################################################ +GRAM_POSITIVE = '+' +GRAM_NEGATIVE = '-' +GRAM_UNKNOWN = '?' + ############################################################################ # Replicon types, length thresholds & topology ############################################################################ diff --git a/bakta/features/signal_peptides.py b/bakta/features/signal_peptides.py new file mode 100644 index 00000000..d58504df --- /dev/null +++ b/bakta/features/signal_peptides.py @@ -0,0 +1,86 @@ +import logging +import sys +import subprocess as sp +from collections import OrderedDict + +import bakta +import bakta.constants as bc +import bakta.config as cfg + +log = logging.getLogger('SIGNAL_PEPTIDES') + +def search(orfs, orf_aa_path): + """Search for signal peptides with DeepSig.""" + + deepsig_output_path = cfg.tmp_path.joinpath('deepsig.gff3') + gram = 'gramn' if cfg.gram == '-' else 'gramp' + cmd = [ + 'deepsig', + '--fasta', str(orf_aa_path), + '--organism', gram, + '--outf', str(deepsig_output_path), + '--outfmt', 'gff3', + '--threads', str(cfg.threads) + ] + log.debug('cmd=%s', cmd) + proc = sp.run( + cmd, + cwd=str(cfg.tmp_path), + env=cfg.env, + stdout=sp.PIPE, + stderr=sp.PIPE, + universal_newlines=True + ) + if(proc.returncode != 0): + log.debug('stdout=\'%s\', stderr=\'%s\'', proc.stdout, proc.stderr) + log.warning('signal peptides failed! deep-sig-error-code=%d', proc.returncode) + raise Exception(f'deepsig error! error code: {proc.returncode}') + + sequences_by_hexdigest = {f"{orf['aa_hexdigest']}": orf for orf in orfs} + + sig_peps = [] + with deepsig_output_path.open() as fh: + for line in fh: + (identifier, tool, feature_type, start_aa, stop_aa, score, placeholder, placeholder2, description) = line.split('\t') + if feature_type == 'Signal peptide': + start_aa = int(start_aa) + stop_aa = int(stop_aa) + score = float(score) + aa_identifier = identifier.split('-')[0] + if (aa_identifier in sequences_by_hexdigest): + orf = sequences_by_hexdigest[aa_identifier] + start_nt, stop_nt = orf_nt_start_stop(orf, start_aa, stop_aa) + + sig_pep = { + 'type': bc.FEATURE_SIGNAL_PEPTIDE, + 'start': start_nt, + 'stop': stop_nt, + 'score': score + } + if (bc.FEATURE_SIGNAL_PEPTIDE not in orf): + orf[bc.FEATURE_SIGNAL_PEPTIDE] = {} + orf[bc.FEATURE_SIGNAL_PEPTIDE] = sig_pep + log.debug( + 'hit: contig=%s, nt-start=%i, nt-stop=%i, aa-start=%i, aa-stop=%i, score=%0.2f', + orf['contig'], start_nt, stop_nt, start_aa, stop_aa, score + ) + sig_peps.append(sig_pep) + else: + log.error('signal peptide: unknown aa_identifier=%s', aa_identifier) + sys.exit('ERROR: signal peptide found for unknown aa_identifier=%s!', aa_identifier) + return sig_peps + + +def orf_nt_start_stop(orf, start_aa, stop_aa): + """Determin signal peptide nucleotide position. + orf: dictonary of ORF, either CDS or sORF + start_aa: sig pep start position within aa seq + stop_aa: sig pep stop position within aa seq + """ + if(orf['strand'] == bc.STRAND_FORWARD): + start_nt = orf['start'] + ((start_aa-1) * 3) + stop_nt = orf['start'] + ((stop_aa-1) * 3) + 2 + else: + start_nt = orf['stop'] - ((stop_aa-1) * 3 + 2) + stop_nt = orf['stop'] - ((start_aa-1) * 3) + return start_nt, stop_nt diff --git a/bakta/io/gff.py b/bakta/io/gff.py index e4ee3be7..8753fceb 100644 --- a/bakta/io/gff.py +++ b/bakta/io/gff.py @@ -212,6 +212,8 @@ def write_gff3(genome, features_by_contig, gff3_path): fh.write(f"{feat['contig']}\tProdigal\tgene\t{start}\t{stop}\t.\t{feat['strand']}\t.\t{gene_annotations}\n") annotations = encode_annotations(annotations) fh.write(f"{feat['contig']}\tProdigal\t{so.SO_CDS.name}\t{start}\t{stop}\t.\t{feat['strand']}\t0\t{annotations}\n") + if(bc.FEATURE_SIGNAL_PEPTIDE in feat): + write_signal_peptide(fh, feat) elif(feat['type'] is bc.FEATURE_SORF): annotations = { 'ID': feat['locus'], @@ -241,6 +243,8 @@ def write_gff3(genome, features_by_contig, gff3_path): fh.write(f"{feat['contig']}\tBakta\tgene\t{start}\t{stop}\t.\t{feat['strand']}\t.\t{gene_annotations}\n") annotations = encode_annotations(annotations) fh.write(f"{feat['contig']}\tBakta\t{so.SO_CDS.name}\t{start}\t{stop}\t.\t{feat['strand']}\t0\t{annotations}\n") + if(bc.FEATURE_SIGNAL_PEPTIDE in feat): + write_signal_peptide(fh, feat) elif(feat['type'] is bc.FEATURE_GAP): annotations = { 'ID': feat['id'], @@ -308,3 +312,16 @@ def encode_annotations(annotations): else: annotation_strings.append(f'{key}={encode_attribute(val)}') return ';'.join(annotation_strings) + + +def write_signal_peptide(fh, feat): + sig_peptide = feat[bc.FEATURE_SIGNAL_PEPTIDE] + annotations = { + 'ID': f"{feat['locus']}_sigpep", + 'Name': 'signal peptide', + 'product': 'signal peptide', + 'score': sig_peptide['score'], + 'Parent': feat['locus'] + } + annotations = encode_annotations(annotations) + fh.write(f"{feat['contig']}\tDeepSig\t{so.SO_SIGNAL_PEPTIDE.name}\t{sig_peptide['start']}\t{sig_peptide['stop']}\t{sig_peptide['score']:.2f}\t{feat['strand']}\t.\t{annotations}\n") \ No newline at end of file diff --git a/bakta/io/insdc.py b/bakta/io/insdc.py index 16c29fc6..2a8a96b5 100644 --- a/bakta/io/insdc.py +++ b/bakta/io/insdc.py @@ -102,6 +102,7 @@ def write_insdc(genome, features, genbank_output_path, embl_output_path): if('locus' in feature): qualifiers['locus_tag'] = feature['locus'] + accompanying_features=[] if(feature['type'] == bc.FEATURE_GAP): insdc_feature_type = bc.INSDC_FEATURE_GAP qualifiers['estimated_length'] = feature['length'] @@ -148,6 +149,15 @@ def write_insdc(genome, features, genbank_output_path, embl_output_path): if(bc.DB_XREF_EC in note): qualifiers['EC_number'] = note.replace('EC:', '') qualifiers['note'] = [note for note in qualifiers['note'] if bc.DB_XREF_EC not in note] + if(bc.FEATURE_SIGNAL_PEPTIDE in feature): + sigpep_qualifiers = {} + sigpep_qualifiers['locus_tag'] = feature['locus'] + sigpep_qualifiers['inference'] = 'ab initio prediction:DeepSig:1.2' + sigpep = feature[bc.FEATURE_SIGNAL_PEPTIDE] + sigpep_strand = 1 if feature['strand'] == bc.STRAND_FORWARD else -1 if feature['strand'] == bc.STRAND_REVERSE else 0 + sigpep_location = FeatureLocation(sigpep['start'] - 1, sigpep['stop'], strand = sigpep_strand) + sigpep_feature = SeqFeature(sigpep_location, type=bc.INSDC_FEATURE_SIGNAL_PEPTIDE, qualifiers=sigpep_qualifiers) + accompanying_features.append(sigpep_feature) elif(feature['type'] == bc.FEATURE_T_RNA): if('amino_acid' in feature and 'anti_codon' in feature): if('anti_codon_pos' in feature): @@ -238,6 +248,8 @@ def write_insdc(genome, features, genbank_output_path, embl_output_path): seq_feature_list.append(gen_seqfeat) feat_seqfeat = SeqFeature(feature_location, type=insdc_feature_type, qualifiers=qualifiers) seq_feature_list.append(feat_seqfeat) + for acc_feature in accompanying_features: # add accompanying features, e.g. signal peptides + seq_feature_list.append(acc_feature) contig_rec.features = seq_feature_list contig_list.append(contig_rec) diff --git a/bakta/main.py b/bakta/main.py index f5375215..38f65849 100755 --- a/bakta/main.py +++ b/bakta/main.py @@ -29,6 +29,7 @@ import bakta.features.crispr as crispr import bakta.features.orf as orf import bakta.features.cds as feat_cds +import bakta.features.signal_peptides as sig_peptides import bakta.features.s_orf as s_orf import bakta.features.gaps as gaps import bakta.features.ori as ori @@ -289,6 +290,10 @@ def main(): user_aa_found = exp_aa_seq.search(cdss, cds_aa_path, 'user_proteins', user_aa_path) print(f'\t\tuser protein sequences: {len(user_aa_found)}') + if(cfg.gram != bc.GRAM_UNKNOWN): + sig_peptides_found = sig_peptides.search(cdss, cds_aa_path) + print(f"\tsignal peptides: {len(sig_peptides_found)}") + print('\tcombine annotations and mark hypotheticals...') log.debug('combine CDS annotations') for cds in cdss: @@ -361,6 +366,14 @@ def main(): anno.combine_annotation(feat) # combine IPS and PSC annotations genome['features'][bc.FEATURE_SORF] = sorfs_filtered print(f'\tfiltered sORFs: {len(sorfs_filtered)}') + + if(cfg.gram != bc.GRAM_UNKNOWN and len(sorfs_filtered) > 0): + sorf_aa_path = cfg.tmp_path.joinpath('sorfs.faa') + with sorf_aa_path.open(mode='wt') as fh: + for sorf in sorfs_filtered: + fh.write(f">{sorf['aa_hexdigest']}-{sorf['contig']}-{sorf['start']}\n{sorf['aa']}\n") + sig_peptides_found = sig_peptides.search(sorfs_filtered, sorf_aa_path) + print(f"\tsignal peptides: {len(sig_peptides_found)}") ############################################################################ # gap annotation @@ -474,6 +487,7 @@ def main(): cdss = [f for f in features if f['type'] == bc.FEATURE_CDS] print(f"\tCDSs: {len(cdss)}") print(f"\t hypotheticals: {len([cds for cds in cdss if 'hypothetical' in cds])}") + print(f"\t signal peptides: {len([cds for cds in cdss if bc.FEATURE_SIGNAL_PEPTIDE in cds])}") print(f"\tsORFs: {len([f for f in features if f['type'] == bc.FEATURE_SORF])}") print(f"\tgaps: {len([f for f in features if f['type'] == bc.FEATURE_GAP])}") print(f"\toriCs/oriVs: {len([f for f in features if (f['type'] == bc.FEATURE_ORIC or f['type'] == bc.FEATURE_ORIV)])}") @@ -552,6 +566,7 @@ def main(): fh_out.write(f"CRISPR arrays: {len([f for f in features if f['type'] == bc.FEATURE_CRISPR])}\n") fh_out.write(f"CDSs: {len(cdss)}\n") fh_out.write(f"hypotheticals: {len([cds for cds in cdss if 'hypothetical' in cds])}\n") + fh_out.write(f"signal peptides: {len([cds for cds in cdss if bc.FEATURE_SIGNAL_PEPTIDE in cds])}\n") fh_out.write(f"sORFs: {len([f for f in features if f['type'] == bc.FEATURE_SORF])}\n") fh_out.write(f"gaps: {len([f for f in features if f['type'] == bc.FEATURE_GAP])}\n") fh_out.write(f"oriCs: {len([f for f in features if f['type'] == bc.FEATURE_ORIC])}\n") diff --git a/bakta/utils.py b/bakta/utils.py index 47af9715..8e91e1a9 100644 --- a/bakta/utils.py +++ b/bakta/utils.py @@ -39,6 +39,7 @@ def print_version(self): DEPENDENCY_PRODIGAL = (Version(2, 6, 3), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('prodigal', '-v'), ['--skip-cds']) DEPENDENCY_HMMSEARCH = (Version(3, 3, 1), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('hmmsearch', '-h'), ['--skip-cds', '--skip-sorf']) DEPENDENCY_DIAMOND = (Version(2, 0, 11), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('diamond', 'help'), ['--skip-cds', '--skip-sorf']) +DEPENDENCY_DEEPSIG = (Version(1, 2, 5), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('deepsig', '--version'), ['--gram ?']) DEPENDENCY_BLASTN = (Version(2, 7, 1), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('blastn', '-version'), ['--skip-ori']) DEPENDENCY_AMRFINDERPLUS = (Version(3, 10, 16), Version(VERSION_MAX_DIGIT, VERSION_MAX_DIGIT, VERSION_MAX_DIGIT), VERSION_REGEX, ('amrfinder', '--version'), ['--skip-cds']) @@ -74,7 +75,7 @@ def parse_arguments(): arg_group_annotation.add_argument('--complete', action='store_true', help='All sequences are complete replicons (chromosome/plasmid[s])') arg_group_annotation.add_argument('--prodigal-tf', action='store', default=None, dest='prodigal_tf', help='Path to existing Prodigal training file to use for CDS prediction') arg_group_annotation.add_argument('--translation-table', action='store', type=int, default=11, choices=[11, 4], dest='translation_table', help='Translation table: 11/4 (default = 11)') - arg_group_annotation.add_argument('--gram', action='store', default='?', choices=['+', '-', '?'], help="Gram type: +/-/? (default = '?')") + arg_group_annotation.add_argument('--gram', action='store', default=bc.GRAM_UNKNOWN, choices=[bc.GRAM_POSITIVE, bc.GRAM_NEGATIVE, bc.GRAM_UNKNOWN], help=f'Gram type for signal peptide predictions: {bc.GRAM_POSITIVE}/{bc.GRAM_NEGATIVE}/{bc.GRAM_UNKNOWN} (default = {bc.GRAM_UNKNOWN})') arg_group_annotation.add_argument('--locus', action='store', default=None, help="Locus prefix (default = 'contig')") arg_group_annotation.add_argument('--locus-tag', action='store', default=None, dest='locus_tag', help='Locus tag prefix (default = autogenerated)') arg_group_annotation.add_argument('--keep-contig-headers', action='store_true', dest='keep_contig_headers', help='Keep original contig headers') @@ -208,6 +209,8 @@ def test_dependencies(): if(not cfg.skip_cds or not cfg.skip_sorf): test_dependency(DEPENDENCY_HMMSEARCH) test_dependency(DEPENDENCY_DIAMOND) + if(cfg.gram != '?'): + test_dependency(DEPENDENCY_DEEPSIG) if(not cfg.skip_ori): test_dependency(DEPENDENCY_BLASTN) diff --git a/test/test_bakta.py b/test/test_bakta.py index 48b15b2c..0cd25eec 100644 --- a/test/test_bakta.py +++ b/test/test_bakta.py @@ -1,3 +1,5 @@ +import sys + from pathlib import Path from subprocess import run @@ -41,10 +43,11 @@ def test_bakta_mock_skipped_features(parameters, tmpdir): (['test/data/NC_002127.1-win.fna.gz']) # Windows format (\r\n) ] ) +@pytest.mark.skipif(sys.platform=='darwin', reason=f'Skip on {sys.platform}') def test_bakta_plasmid(parameters, tmpdir): # full test on plasmid proc = run( - ['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--tmp-dir', tmpdir, '--prefix', 'test', '--min-contig-length', '200', '--complete', '--proteins', 'test/data/user-proteins.faa'] + + ['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--tmp-dir', tmpdir, '--prefix', 'test', '--min-contig-length', '200', '--complete', '--gram', '-', '--proteins', 'test/data/user-proteins.faa'] + ['--genus', 'Foo gen. nov.', '--species', 'bar sp. nov.', '--strain', 'test 1'] + parameters ) diff --git a/test/test_sig_peps.py b/test/test_sig_peps.py new file mode 100644 index 00000000..8eaf5471 --- /dev/null +++ b/test/test_sig_peps.py @@ -0,0 +1,28 @@ +import pytest + +from bakta.features import signal_peptides as bsp + + +one_twentyone_fwd = {'start': 1, 'stop': 21, 'strand': '+'} +one_twentyone_rev = {'start': 1, 'stop': 21, 'strand': '-'} +four_fifteen_fwd = {'start': 4, 'stop': 15, 'strand': '+'} +four_fifteen_rev = {'start': 4, 'stop': 15, 'strand': '-'} + + +@pytest.mark.parametrize( + "orf, start_aa, stop_aa, expected", + [ + # Forward strand + (one_twentyone_fwd, 1, 3, (1,9)), # ORF spans whole sequence length, signal peptide starts at AA 1 + (one_twentyone_fwd, 2, 7, (4,21)), # ORF spans whole sequence length, signal peptide stops at last AA + (four_fifteen_fwd, 1, 3, (4,12)), # ORF does not span whole sequence length, signal peptide starts at AA 1 + (four_fifteen_fwd, 2, 4, (7,15)), # ORF does not span whole sequence length, signal peptide stops at last AA + # Reverse strand + (one_twentyone_rev, 1, 3, (13,21)), # ORF spans whole sequence length, signal peptide starts at AA 1 + (one_twentyone_rev, 3, 7, (1,15)), # ORF spans whole sequence length, signal peptide stops at last AA + (four_fifteen_rev, 1, 2, (10,15)), # ORF does not span whole sequence length, signal peptide starts at AA 1 + (four_fifteen_rev, 2, 3, (7,12)) # ORF does not span whole sequence length, signal peptide in the middle of ORF + ] +) +def test_start_stop(orf, start_aa, stop_aa, expected): + assert bsp.orf_nt_start_stop(orf, start_aa, stop_aa) == expected \ No newline at end of file