Skip to content

Commit

Permalink
add user protein tests #76
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Sep 9, 2021
1 parent 992d929 commit 1541f10
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 3 deletions.
4 changes: 4 additions & 0 deletions test/data/user-proteins.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>VFDB_test 90~~~90~~~90~~~yaxA~~~cytotoxin YaxA~~~VFDB:VFG045347,VFDB:VF0511
MTQTQLAIDNVLASAENTIQLNELPKVVLDFITGEQTSVARSGGIFTKEDLINLKLYVRKGLSLPTRQDEVEAYLGYKKIDVAGLEPKDIKLLFDEIHNHALNWNDVEQAVLQQSLDLDIAAKNIISTGNEIINLINQMPITLRVKTLLRDITDKQLENITYESADHEVASALKDILDDMKGDINRHQTTTENVRKKVSDYRITLTGGELSSGDKVNGLEPQVKTKYDLMEKSNMRKSIKELDEKIKEKKQRIEQLKKDYDKFVGLSFTGAIGGIIAMAITSGIFGAKAENARKEKNALISEVAELESKVSSQRALQTALEALSLSFSDIGIRMVDAESALNHLDFMWLSVLNQITESQIQFAMINNALRLTSFVNKFQQVITPWQSVGDSARQLVDIFDEAIKEYKKVYG
>hypo-mock-test 99~~~99~~~99~~~mock1~~~mock hypothetical user protein 1~~~USERDB:MOCK1
MAQNPFKALNINIDKIESALTQNGVTNYSSNVKNERETHISGTYKGIDFLIKLMPSGGNTTIGRASGQNNTYFDEIALIIKENCLYSDTKNFEYTIPKFSDDDRANLFEFLSEEGITITEDNNNDPNCKHQYIMTTSNGDRVRAKIYKRGSIQFQGKYLQIASLINDFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKKQLSCSLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNLGEYFTENKPKYIIREIHQETINGEIAEVLCECYTYWHENRHGLFHMKPGIADTKTINKLESIAIIDTVCQLIDGGVARLKL
28 changes: 28 additions & 0 deletions test/test_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,34 @@ def test_replicons_ok(tmpdir):
assert Path.exists(tmpdir_path.joinpath(file))


@pytest.mark.parametrize(
'parameters',
[
(['--proteins']), # not provided
(['--proteins', '']), # empty
(['--proteins', 'foo']) # not existing
]
)
def test_proteins_failiing(parameters, tmpdir):
# test proteins file arguments

# missing path
proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir] + parameters + ['test/data/NC_002127.1.fna'])
assert proc.returncode != 0


@pytest.mark.slow
def test_proteins_ok(tmpdir):
# test proteins file arguments

proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna'])
assert proc.returncode == 0

tmpdir_path = Path(tmpdir)
for file in FILES:
assert Path.exists(tmpdir_path.joinpath(file))


def test_output_failing():
# test database arguments
cmd_line = ['bin/bakta', '--output', '/', 'test/data/draft-w-plasmids.fna']
Expand Down
6 changes: 3 additions & 3 deletions test/test_bakta.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@pytest.mark.slow
def test_bakta_mock_skipped_features(tmpdir):
# fast test skipping all feature detections
proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna'])
proc = run(['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa'] + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna'])
assert proc.returncode == 0

tmpdir_path = Path(tmpdir)
Expand All @@ -20,7 +20,7 @@ def test_bakta_mock_skipped_features(tmpdir):
@pytest.mark.slow
def test_bakta_plasmid(tmpdir):
# full test on plasmid
proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', 'test/data/NC_002127.1.fna'])
proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--proteins', 'test/data/user-proteins.faa', 'test/data/NC_002127.1.fna'])
assert proc.returncode == 0

tmpdir_path = Path(tmpdir)
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_bakta_plasmid(tmpdir):
@pytest.mark.slow
def test_bakta_genome(tmpdir):
# full test on complete genome in compliant mode
proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--compliant', 'test/data/GCF_000008865.2.fna.gz'])
proc = run(['bin/bakta', '--db', 'test/db', '--verbose', '--output', tmpdir, '--prefix', 'test', '--complete', '--compliant', '--proteins', 'test/data/user-proteins.faa', 'test/data/GCF_000008865.2.fna.gz'])
assert proc.returncode == 0

tmpdir_path = Path(tmpdir)
Expand Down
163 changes: 163 additions & 0 deletions test/test_user_proteins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import json

from pathlib import Path
from subprocess import run

import bakta.config as cfg
import bakta.expert.protein_sequences as exp_aa_seq

import pytest


SEQUENCE = 'MRADEEPGDLSAVAQDYLKVIWTAQEWSQDKVSTKMLAERIGVSASTASESIRKLAEQGLVDHEKYGAVTLTDSGRRAALAMVRRHRLLETFLVNELGYRWDEVHDEA'


aa_min = {
'id': 'min',
'description': '~~~product~~~',
'sequence': SEQUENCE
}
aa_min_gene = {
'id': 'min',
'description': 'gene~~~product~~~',
'sequence': SEQUENCE
}
aa_min_dbxref = {
'id': 'min',
'description': 'gene~~~product~~~db-1:id-1',
'sequence': SEQUENCE
}
aa_min_dbxrefs = {
'id': 'min',
'description': 'gene~~~product~~~db-1:id-1,db-2:id-2',
'sequence': SEQUENCE
}
aa_full = {
'id': 'full',
'description': '90.0~~~80.0~~~80.0~~~gene~~~product~~~db-1:id-1,db-2:id-2',
'sequence': SEQUENCE
}


aa_wrong_1 = {
'id': 'low-cols',
'description': '~~~product',
'sequence': SEQUENCE
}
aa_wrong_2 = {
'id': 'high-cols',
'description': '90~~~80~~~80~~~gene~~~product~~~dbxref:dbxref~~~',
'sequence': SEQUENCE
}
aa_wrong_3 = {
'id': 'no-product',
'description': 'gene~~~~~~dbxref:dbxref',
'sequence': SEQUENCE
}
aa_wrong_4 = {
'id': 'no-product-full',
'description': '90~~~80~~~80~~~gene~~~~~~dbxref:dbxref',
'sequence': SEQUENCE
}
aa_wrong_5 = {
'id': 'wrong-dbxref',
'description': 'gene~~~product~~~dbxrefdbxref',
'sequence': SEQUENCE
}
aa_wrong_6 = {
'id': 'wrong-dbxref-full',
'description': '90~~~80~~~80~~~gene~~~product~~~dbxrefdbxref',
'sequence': SEQUENCE
}
aa_wrong_7 = {
'id': 'wrong-id',
'description': 'ninety~~~80~~~80~~~gene~~~product~~~dbxref:dbxref',
'sequence': SEQUENCE
}
aa_wrong_8 = {
'id': 'wrong-min-query-cov',
'description': '90~~~eighty~~~80~~~gene~~~product~~~dbxref:dbxref',
'sequence': SEQUENCE
}
aa_wrong_9 = {
'id': 'wrong-min-model-cov',
'description': '90~~~80~~~eighty~~~gene~~~product~~~dbxref:dbxref',
'sequence': SEQUENCE
}


@pytest.mark.parametrize(
"aa",
[
(aa_wrong_1),
(aa_wrong_2),
(aa_wrong_3),
(aa_wrong_4),
(aa_wrong_5),
(aa_wrong_6),
(aa_wrong_7),
(aa_wrong_8),
(aa_wrong_9)
]
)
def test_wrong_user_proteins_io(tmpdir, aa):
tmpdir = Path(tmpdir)
cfg.user_proteins = tmpdir.joinpath('user.faa')
write_tmp_faa(aa, cfg.user_proteins)

user_proteins_path = tmpdir.joinpath('user-clean.faa')
with pytest.raises(SystemExit) as pytest_wrapped_e:
exp_aa_seq.write_user_protein_sequences(user_proteins_path)
assert pytest_wrapped_e.type == SystemExit


@pytest.mark.parametrize(
"aa",
[
(aa_min),
(aa_min_gene),
(aa_min_dbxref),
(aa_min_dbxrefs),
(aa_full)
]
)
def test_user_proteins_io(tmpdir, aa):
tmpdir = Path(tmpdir)
cfg.user_proteins = tmpdir.joinpath('user.faa')
write_tmp_faa(aa, cfg.user_proteins)

user_proteins_path = tmpdir.joinpath('user-clean.faa')
exp_aa_seq.write_user_protein_sequences(user_proteins_path)


def write_tmp_faa(aa, aa_path):
with aa_path.open('w') as fh:
fh.write(f">{aa['id']} {aa['description']}\n")
fh.write(aa['sequence'])
fh.write('\n')


@pytest.mark.slow
def test_user_proteins(tmpdir):
# fast test skipping all feature detections
proc = run(
[
'bin/bakta', '--db', 'test/db', '--output', tmpdir, '--prefix', 'test', '--proteins', 'test/data/user-proteins.faa',
'--skip-tmrna', '--skip-trna', '--skip-rrna', '--skip-ncrna', '--skip-ncrna-region', '--skip-crispr', '--skip-sorf', '--skip-ori', '--skip-gap',
'test/data/NC_002127.1.fna'
]
)
assert proc.returncode == 0

tmpdir_path = Path(tmpdir)
results_path = Path(tmpdir_path.joinpath('test.json'))
assert Path.exists(results_path)
results = None
with results_path.open() as fh:
results = json.load(fh)
assert results is not None
user_prot_feats = []
for feat in results['features']:
if('expert' in feat and 'user_proteins' in feat['expert']):
user_prot_feats.append(feat)
assert len(user_prot_feats) == 1

0 comments on commit 1541f10

Please sign in to comment.