diff --git a/test/data/user-proteins.faa b/test/data/user-proteins.faa new file mode 100644 index 00000000..8212502f --- /dev/null +++ b/test/data/user-proteins.faa @@ -0,0 +1,4 @@ +>VFDB_test 90~~~90~~~90~~~yaxA~~~cytotoxin YaxA~~~VFDB:VFG045347,VFDB:VF0511 +MTQTQLAIDNVLASAENTIQLNELPKVVLDFITGEQTSVARSGGIFTKEDLINLKLYVRKGLSLPTRQDEVEAYLGYKKIDVAGLEPKDIKLLFDEIHNHALNWNDVEQAVLQQSLDLDIAAKNIISTGNEIINLINQMPITLRVKTLLRDITDKQLENITYESADHEVASALKDILDDMKGDINRHQTTTENVRKKVSDYRITLTGGELSSGDKVNGLEPQVKTKYDLMEKSNMRKSIKELDEKIKEKKQRIEQLKKDYDKFVGLSFTGAIGGIIAMAITSGIFGAKAENARKEKNALISEVAELESKVSSQRALQTALEALSLSFSDIGIRMVDAESALNHLDFMWLSVLNQITESQIQFAMINNALRLTSFVNKFQQVITPWQSVGDSARQLVDIFDEAIKEYKKVYG +>hypo-mock-test 99~~~99~~~99~~~mock1~~~mock hypothetical user protein 1~~~USERDB:MOCK1 +MAQNPFKALNINIDKIESALTQNGVTNYSSNVKNERETHISGTYKGIDFLIKLMPSGGNTTIGRASGQNNTYFDEIALIIKENCLYSDTKNFEYTIPKFSDDDRANLFEFLSEEGITITEDNNNDPNCKHQYIMTTSNGDRVRAKIYKRGSIQFQGKYLQIASLINDFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKKQLSCSLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNLGEYFTENKPKYIIREIHQETINGEIAEVLCECYTYWHENRHGLFHMKPGIADTKTINKLESIAIIDTVCQLIDGGVARLKL \ No newline at end of file diff --git a/test/test_args.py b/test/test_args.py index 39e7240a..049e97dc 100644 --- a/test/test_args.py +++ b/test/test_args.py @@ -139,6 +139,34 @@ def test_replicons_ok(tmpdir): assert Path.exists(tmpdir_path.joinpath(file)) +@pytest.mark.parametrize( + 'parameters', + [ + (['--proteins']), # not provided + (['--proteins', '']), # empty + (['--proteins', 'foo']) # not existing + ] +) +def test_proteins_failiing(parameters, tmpdir): + # test proteins file arguments + + # missing path + proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir] + parameters + ['test/data/NC_002127.1.fna']) + assert proc.returncode != 0 + + +@pytest.mark.slow +def test_proteins_ok(tmpdir): + # test proteins file arguments + + proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna']) + assert proc.returncode == 0 + + tmpdir_path = Path(tmpdir) + for file in FILES: + assert Path.exists(tmpdir_path.joinpath(file)) + + def test_output_failing(): # test database arguments cmd_line = ['bin/bakta', '--output', '/', 'test/data/draft-w-plasmids.fna'] diff --git a/test/test_bakta.py b/test/test_bakta.py index 39f6a993..6a5d7fba 100644 --- a/test/test_bakta.py +++ b/test/test_bakta.py @@ -9,7 +9,7 @@ @pytest.mark.slow def test_bakta_mock_skipped_features(tmpdir): # fast test skipping all feature detections - proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna']) + proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna']) assert proc.returncode == 0 tmpdir_path = Path(tmpdir) @@ -20,7 +20,7 @@ def test_bakta_mock_skipped_features(tmpdir): @pytest.mark.slow def test_bakta_plasmid(tmpdir): # full test on plasmid - proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', 'test/data/NC_002127.1.fna']) + proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--proteins', 'test/data/user-proteins.faa', 'test/data/NC_002127.1.fna']) assert proc.returncode == 0 tmpdir_path = Path(tmpdir) @@ -52,7 +52,7 @@ def test_bakta_plasmid(tmpdir): @pytest.mark.slow def test_bakta_genome(tmpdir): # full test on complete genome in compliant mode - proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--compliant', 'test/data/GCF_000008865.2.fna.gz']) + proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--compliant', '--proteins', 'test/data/user-proteins.faa', 'test/data/GCF_000008865.2.fna.gz']) assert proc.returncode == 0 tmpdir_path = Path(tmpdir) diff --git a/test/test_user_proteins.py b/test/test_user_proteins.py new file mode 100644 index 00000000..387f1efd --- /dev/null +++ b/test/test_user_proteins.py @@ -0,0 +1,163 @@ +import json + +from pathlib import Path +from subprocess import run + +import bakta.config as cfg +import bakta.expert.protein_sequences as exp_aa_seq + +import pytest + + +SEQUENCE = 'MRADEEPGDLSAVAQDYLKVIWTAQEWSQDKVSTKMLAERIGVSASTASESIRKLAEQGLVDHEKYGAVTLTDSGRRAALAMVRRHRLLETFLVNELGYRWDEVHDEA' + + +aa_min = { + 'id': 'min', + 'description': '~~~product~~~', + 'sequence': SEQUENCE +} +aa_min_gene = { + 'id': 'min', + 'description': 'gene~~~product~~~', + 'sequence': SEQUENCE +} +aa_min_dbxref = { + 'id': 'min', + 'description': 'gene~~~product~~~db-1:id-1', + 'sequence': SEQUENCE +} +aa_min_dbxrefs = { + 'id': 'min', + 'description': 'gene~~~product~~~db-1:id-1,db-2:id-2', + 'sequence': SEQUENCE +} +aa_full = { + 'id': 'full', + 'description': '90.0~~~80.0~~~80.0~~~gene~~~product~~~db-1:id-1,db-2:id-2', + 'sequence': SEQUENCE +} + + +aa_wrong_1 = { + 'id': 'low-cols', + 'description': '~~~product', + 'sequence': SEQUENCE +} +aa_wrong_2 = { + 'id': 'high-cols', + 'description': '90~~~80~~~80~~~gene~~~product~~~dbxref:dbxref~~~', + 'sequence': SEQUENCE +} +aa_wrong_3 = { + 'id': 'no-product', + 'description': 'gene~~~~~~dbxref:dbxref', + 'sequence': SEQUENCE +} +aa_wrong_4 = { + 'id': 'no-product-full', + 'description': '90~~~80~~~80~~~gene~~~~~~dbxref:dbxref', + 'sequence': SEQUENCE +} +aa_wrong_5 = { + 'id': 'wrong-dbxref', + 'description': 'gene~~~product~~~dbxrefdbxref', + 'sequence': SEQUENCE +} +aa_wrong_6 = { + 'id': 'wrong-dbxref-full', + 'description': '90~~~80~~~80~~~gene~~~product~~~dbxrefdbxref', + 'sequence': SEQUENCE +} +aa_wrong_7 = { + 'id': 'wrong-id', + 'description': 'ninety~~~80~~~80~~~gene~~~product~~~dbxref:dbxref', + 'sequence': SEQUENCE +} +aa_wrong_8 = { + 'id': 'wrong-min-query-cov', + 'description': '90~~~eighty~~~80~~~gene~~~product~~~dbxref:dbxref', + 'sequence': SEQUENCE +} +aa_wrong_9 = { + 'id': 'wrong-min-model-cov', + 'description': '90~~~80~~~eighty~~~gene~~~product~~~dbxref:dbxref', + 'sequence': SEQUENCE +} + + +@pytest.mark.parametrize( + "aa", + [ + (aa_wrong_1), + (aa_wrong_2), + (aa_wrong_3), + (aa_wrong_4), + (aa_wrong_5), + (aa_wrong_6), + (aa_wrong_7), + (aa_wrong_8), + (aa_wrong_9) + ] +) +def test_wrong_user_proteins_io(tmpdir, aa): + tmpdir = Path(tmpdir) + cfg.user_proteins = tmpdir.joinpath('user.faa') + write_tmp_faa(aa, cfg.user_proteins) + + user_proteins_path = tmpdir.joinpath('user-clean.faa') + with pytest.raises(SystemExit) as pytest_wrapped_e: + exp_aa_seq.write_user_protein_sequences(user_proteins_path) + assert pytest_wrapped_e.type == SystemExit + + +@pytest.mark.parametrize( + "aa", + [ + (aa_min), + (aa_min_gene), + (aa_min_dbxref), + (aa_min_dbxrefs), + (aa_full) + ] +) +def test_user_proteins_io(tmpdir, aa): + tmpdir = Path(tmpdir) + cfg.user_proteins = tmpdir.joinpath('user.faa') + write_tmp_faa(aa, cfg.user_proteins) + + user_proteins_path = tmpdir.joinpath('user-clean.faa') + exp_aa_seq.write_user_protein_sequences(user_proteins_path) + + +def write_tmp_faa(aa, aa_path): + with aa_path.open('w') as fh: + fh.write(f">{aa['id']} {aa['description']}\n") + fh.write(aa['sequence']) + fh.write('\n') + + +@pytest.mark.slow +def test_user_proteins(tmpdir): + # fast test skipping all feature detections + proc = run( + [ + 'bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa', + '--skip-tmrna', '--skip-trna', '--skip-rrna', '--skip-ncrna', '--skip-ncrna-region', '--skip-crispr', '--skip-sorf', '--skip-ori', '--skip-gap', + 'test/data/NC_002127.1.fna' + ] + ) + assert proc.returncode == 0 + + tmpdir_path = Path(tmpdir) + results_path = Path(tmpdir_path.joinpath('test.json')) + assert Path.exists(results_path) + results = None + with results_path.open() as fh: + results = json.load(fh) + assert results is not None + user_prot_feats = [] + for feat in results['features']: + if('expert' in feat and 'user_proteins' in feat['expert']): + user_prot_feats.append(feat) + assert len(user_prot_feats) == 1