diff --git a/bakta/config.py b/bakta/config.py index 757e4018..3fec9952 100644 --- a/bakta/config.py +++ b/bakta/config.py @@ -204,6 +204,11 @@ def setup(args): log.info('translation_table=%s', translation_table) gram = args.gram log.info('gram=%s', gram) + compliant = args.compliant + log.info('compliant=%s', compliant) + if(compliant): + min_contig_length = 200 + log.info('compliant mode! min_contig_length=%s', min_contig_length) locus = args.locus if(locus is not None): if(locus == ''): @@ -214,7 +219,7 @@ def setup(args): sys.exit(f"ERROR: whitespace character ({locus}) in 'locus' parameter!") if(bc.RE_INSDC_ID_PREFIX.fullmatch(locus) is None): log.error("Invalid 'locus' parameter! locus=%s", locus) - sys.exit(f"ERROR: invalid 'locus' parameter ({locus})!\nLocus prefixes must contain between 1 and 20 alphanumeric and '-_' characters.") + sys.exit(f"ERROR: invalid 'locus' parameter ({locus})!\nLocus prefixes must contain between 1 and 20 alphanumeric or '-_' characters.") log.info('locus=%s', locus) locus_tag = args.locus_tag if(locus_tag is not None): @@ -224,9 +229,14 @@ def setup(args): if(' ' in locus_tag): log.error("Whitespace character in 'locus-tag' parameter! locus-tag=%s", locus_tag) sys.exit(f"ERROR: whitespace character ({locus_tag}) in 'locus-tag' parameter!") - if(bc.RE_INSDC_LOCUSTAG_PREFIX.fullmatch(locus_tag) is None): - log.error("Invalid 'locus-tag' parameter! locus-tag=%s", locus_tag) - sys.exit(f"ERROR: invalid 'locus-tag' parameter ({locus_tag})!\nLocus tag prefixes must contain between 3 and 12 alphanumeric uppercase characters and start with a letter.") + if(compliant): + if(bc.RE_INSDC_LOCUSTAG_PREFIX.fullmatch(locus_tag) is None): + log.error("INSDC-incompliant 'locus-tag' parameter! locus-tag=%s", locus_tag) + sys.exit(f"ERROR: INSDC-incompliant 'locus-tag' parameter ({locus_tag})!\nINSDC Locus tag prefixes must contain between 3 and 12 alphanumeric uppercase characters and start with a letter.") + else: + if(bc.RE_LOCUSTAG_PREFIX.fullmatch(locus_tag) is None): + log.error("Invalid 'locus-tag' parameter! locus-tag=%s", locus_tag) + sys.exit(f"ERROR: invalid 'locus-tag' parameter ({locus_tag})!\nLocus tag prefixes must contain between 1 and 24 alphanumeric characters or '_.-' signs.") log.info('locus-tag=%s', locus_tag) keep_contig_headers = args.keep_contig_headers log.info('keep_contig_headers=%s', keep_contig_headers) @@ -243,11 +253,6 @@ def setup(args): log.error('provided replicon file not valid! path=%s', replicons) sys.exit(f'ERROR: replicon table file ({replicons}) not valid!') log.info('replicon-table=%s', replicons) - compliant = args.compliant - log.info('compliant=%s', compliant) - if(compliant): - min_contig_length = 200 - log.info('compliant mode! min_contig_length=%s', min_contig_length) user_proteins = args.proteins if(user_proteins is not None): try: diff --git a/bakta/constants.py b/bakta/constants.py index a5043353..a7e2bce9 100644 --- a/bakta/constants.py +++ b/bakta/constants.py @@ -7,6 +7,7 @@ RE_MULTIWHITESPACE = re.compile(r'\s+') RE_INSDC_ID_PREFIX = re.compile(r'[A-Za-z\d_.:*#-]{1,20}') # https://www.ncbi.nlm.nih.gov/WebSub/html/help/fasta.html RE_INSDC_ID = re.compile(r'[A-Za-z\d_.:*#-]{1,25}') # https://www.ncbi.nlm.nih.gov/WebSub/html/help/fasta.html +RE_LOCUSTAG_PREFIX = re.compile(r'[A-Za-z\d_.-]{1,24}') # https://ena-docs.readthedocs.io/en/latest/faq/locus_tags.html RE_INSDC_LOCUSTAG_PREFIX = re.compile(r'[A-Z][A-Z0-9]{2,11}') # https://ena-docs.readthedocs.io/en/latest/faq/locus_tags.html ############################################################################ diff --git a/test/test_args.py b/test/test_args.py index fb132946..9f5c5e15 100644 --- a/test/test_args.py +++ b/test/test_args.py @@ -288,6 +288,67 @@ def test_locus_ok(parameters, tmpdir): assert proc.returncode == 0 +@pytest.mark.parametrize( + 'parameters', + [ + (['--locus-tag']), # not provided + (['--locus-tag', '']), # empty + (['--locus-tag', ' ']), # whitespace only + (['--locus-tag', ' ']), # whitespaces only + (['--locus-tag', 'ABCDEFGHIJKLMNOPQRSTUVWXZ']), # more than 24 characters + (['--locus-tag', 'ABC!']), # wrong characters + (['--locus-tag', 'ABC?']), # wrong characters + (['--locus-tag', 'ABC*']), # wrong characters + (['--locus-tag', 'ABC,']), # wrong characters + (['--locus-tag', 'ABC;']), # wrong characters + (['--locus-tag', 'ABC:']), # wrong characters + (['--locus-tag', 'ABC§']), # wrong characters + (['--locus-tag', 'ABC$']), # wrong characters + (['--locus-tag', 'ABC%']), # wrong characters + (['--locus-tag', 'ABC&']), # wrong characters + (['--locus-tag', 'ABC/']), # wrong characters + (['--locus-tag', 'ABC=']), # wrong characters + (['--locus-tag', 'ABC#']) # wrong characters + ] +) +def test_locustag_failiing(parameters, tmpdir): + # test locus-tag prefix arguments + proc = run( + ['bin/bakta', '--db', 'test/db', '--output', tmpdir] + + parameters + + SKIP_PARAMETERS + + ['test/data/NC_002127.1.fna'] + ) + assert proc.returncode != 0 + + +@pytest.mark.slow +@pytest.mark.parametrize( + 'parameters', + [ + (['--locus-tag', 'A']), + (['--locus-tag', '1']), + (['--locus-tag', 'ABCDEFGHIJKLMNOPQRSTUVWX']), + (['--locus-tag', 'A12']), + (['--locus-tag', 'ABC.']), + (['--locus-tag', 'ABC-']), + (['--locus-tag', 'ABC_']), + (['--locus-tag', 'GCF_014267685.1']), + (['--locus-tag', 'ASM25969v1']), + (['--locus-tag', 'DAESDI010000001.1']) + ] +) +def test_locustag_ok(parameters, tmpdir): + # test locus-tag prefix arguments + proc = run( + ['bin/bakta', '--db', 'test/db', '--output', tmpdir] + + parameters + + SKIP_PARAMETERS + + ['test/data/NC_002127.1.fna'] + ) + assert proc.returncode == 0 + + @pytest.mark.parametrize( 'parameters', [ @@ -302,18 +363,26 @@ def test_locus_ok(parameters, tmpdir): (['--locus-tag', 'ABCDEFGHIJKLM']), # more than 12 characters (['--locus-tag', 'ABC_']), # wrong characters (['--locus-tag', 'ABC-']), # wrong characters + (['--locus-tag', 'ABC.']), # wrong characters (['--locus-tag', 'ABC!']), # wrong characters (['--locus-tag', 'ABC?']), # wrong characters (['--locus-tag', 'ABC*']), # wrong characters - (['--locus-tag', 'ABC.']), # wrong characters (['--locus-tag', 'ABC,']), # wrong characters - (['--locus-tag', 'ABC;']) # wrong characters + (['--locus-tag', 'ABC;']), # wrong characters + (['--locus-tag', 'ABC:']), # wrong characters + (['--locus-tag', 'ABC§']), # wrong characters + (['--locus-tag', 'ABC$']), # wrong characters + (['--locus-tag', 'ABC%']), # wrong characters + (['--locus-tag', 'ABC&']), # wrong characters + (['--locus-tag', 'ABC/']), # wrong characters + (['--locus-tag', 'ABC=']), # wrong characters + (['--locus-tag', 'ABC#']) # wrong characters ] ) -def test_locustag_failiing(parameters, tmpdir): +def test_locustag_compliant_failiing(parameters, tmpdir): # test locus-tag prefix arguments proc = run( - ['bin/bakta', '--db', 'test/db', '--output', tmpdir] + + ['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--compliant'] + parameters + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna'] @@ -331,10 +400,10 @@ def test_locustag_failiing(parameters, tmpdir): (['--locus-tag', 'A23456789012']) ] ) -def test_locustag_ok(parameters, tmpdir): +def test_locustag_compliant_ok(parameters, tmpdir): # test locus-tag prefix arguments proc = run( - ['bin/bakta', '--db', 'test/db', '--output', tmpdir] + + ['bin/bakta', '--db', 'test/db', '--output', tmpdir, '--compliant'] + parameters + SKIP_PARAMETERS + ['test/data/NC_002127.1.fna']