Skip to content

Commit

Permalink
fix linting/formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
gbouras13 committed Nov 20, 2024
1 parent c16c61d commit 7675f08
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 54 deletions.
30 changes: 16 additions & 14 deletions src/dnaapler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
from loguru import logger

from dnaapler.utils.all import all_process_MMseqs2_output_and_reorient
from dnaapler.utils.bulk import bulk_process_MMseqs2_output_and_reorient, run_bulk_MMseqs2
from dnaapler.utils.bulk import (
bulk_process_MMseqs2_output_and_reorient,
run_bulk_MMseqs2,
)
from dnaapler.utils.cds_methods import (
run_MMseqs2_based_method,
run_largest,
run_MMseqs2_based_method,
run_mystery,
run_nearest,
)
Expand All @@ -24,7 +27,7 @@
end_dnaapler,
get_version,
print_citation,
run_autocomplete
run_autocomplete,
)
from dnaapler.utils.validation import (
check_evalue,
Expand Down Expand Up @@ -527,15 +530,14 @@ def custom(
custom_database = os.path.join(db_dir, "custom_db")

makeMMseqs2db = ExternalTool(
tool="mmseqs",
input=f"createdb {custom_db_fasta}",
output=f" {custom_database}",
logdir=logdir,
)
tool="mmseqs",
input=f"createdb {custom_db_fasta}",
output=f" {custom_database}",
logdir=logdir,
)

ExternalTool.run_tool(makeMMseqs2db, ctx)


# runs and processes MMseqs2
MMseqs2_success = run_MMseqs2_based_method(
ctx, input, output, prefix, gene, evalue, threads
Expand Down Expand Up @@ -787,11 +789,11 @@ def bulk(
custom_database = os.path.join(db_dir, "custom_db")

makeMMseqs2db = ExternalTool(
tool="mmseqs",
input=f"createdb {custom_db_fasta}",
output=f" {custom_database}",
logdir=logdir,
)
tool="mmseqs",
input=f"createdb {custom_db_fasta}",
output=f" {custom_database}",
logdir=logdir,
)

ExternalTool.run_tool(makeMMseqs2db, ctx)

Expand Down
4 changes: 2 additions & 2 deletions src/dnaapler/utils/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def all_process_MMseqs2_output_and_reorient(
:return:
"""

# define colnames - keep the same colnames as BLAST for ease
# define colnames - keep the same colnames as BLAST for ease
# matches the MMseqs2 ones to make subbing MMseqs2 for BLAST as easy as possible
# MMseqs2_columns = "query,qlen,target,tlen,alnlen,qstart,qend,tstart,tend,fident,nident,gapopen,mismatch,evalue,bits,qaln,taln"

col_list = [
"qseqid",
"qlen",
Expand Down
18 changes: 9 additions & 9 deletions src/dnaapler/utils/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from dnaapler.utils.processing import reorient_single_record_bulk
from dnaapler.utils.validation import validate_custom_db_fasta


def run_bulk_MMseqs2(
ctx,
input: Path,
Expand Down Expand Up @@ -69,7 +70,7 @@ def run_bulk_MMseqs2(
# matches the MMseqs2 ones to make subbing MMseqs2 for BLAST as easy as possible
MMseqs2_columns = "query,qlen,target,tlen,alnlen,qstart,qend,tstart,tend,fident,nident,gapopen,mismatch,evalue,bits,qaln,taln"
db = os.path.join(DNAAPLER_DB, db_name)

elif gene == "custom":
# validates custom fasta input for database
validate_custom_db_fasta(Path(custom_db))
Expand All @@ -90,19 +91,18 @@ def run_bulk_MMseqs2(

db = custom_database


MMseqs2 = ExternalTool(
tool="mmseqs easy-search",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)
tool="mmseqs easy-search",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)

ExternalTool.run_tool(MMseqs2, ctx)
from dnaapler.utils.util import remove_directory
remove_directory(MMseqs2_output_tmpdir)

remove_directory(MMseqs2_output_tmpdir)


def bulk_process_MMseqs2_output_and_reorient(
Expand Down
31 changes: 14 additions & 17 deletions src/dnaapler/utils/cds_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,22 +231,20 @@ def run_MMseqs2_based_method(
# matches the blast ones to make subbing MMseqs2 for BLAST as easy as possible
MMseqs2_columns = "query,qlen,target,tlen,alnlen,qstart,qend,tstart,tend,fident,nident,gapopen,mismatch,evalue,bits,qaln,taln"


db = os.path.join(DNAAPLER_DB, db_name)
if gene == "custom":
db = os.path.join(output, "custom_db", "custom_db")

# run MMseqs2 easy-search
MMseqs2 = ExternalTool(
tool="mmseqs easy-search",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)
tool="mmseqs easy-search",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)

ExternalTool.run_tool(MMseqs2, ctx)


# reorient the genome based on the MMseqs22 hit
output_processed_file = os.path.join(output, f"{prefix}_reoriented.fasta")
Expand Down Expand Up @@ -289,26 +287,26 @@ def run_MMseqs2_based_method_bulk(
MMseqs2_output_tmpdir = Path(f"{output}/tmp_MMseqs2_output")
MMseqs2_output_file = Path(f"{output}/{prefix}_MMseqs2_output.txt")
# matches the blast ones to make subbing MMseqs2 for BLAST as easy as possible
# qaln and taln are the translated alignments
# qaln and taln are the translated alignments
MMseqs2_columns = "query,qlen,target,tlen,alnlen,qstart,qend,tstart,tend,fident,nident,gapopen,mismatch,evalue,bits,qaln,taln"

db = os.path.join(DNAAPLER_DB, db_name)
if gene == "custom":
db = os.path.join(output, "custom_db", "custom_db")


# MMSeqs2 easy-search
MMseqs2 = ExternalTool(
tool="mmseqs easy-search ",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)
tool="mmseqs easy-search ",
input=f"{input} {db}",
output=f"{MMseqs2_output_file}",
params=f"{MMseqs2_output_tmpdir} --search-type 2 --threads {threads} -e {evalue} --format-output {MMseqs2_columns}",
logdir=logdir,
)

ExternalTool.run_tool(MMseqs2, ctx)

from dnaapler.utils.util import remove_directory

remove_directory(MMseqs2_output_tmpdir)

# reorient the genome based on the MMseqs2 hit
Expand All @@ -318,4 +316,3 @@ def run_MMseqs2_based_method_bulk(
)

return MMseqs2_success

2 changes: 1 addition & 1 deletion src/dnaapler/utils/external_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def command_as_str(self) -> str:

@staticmethod
def _build_command(tool: str, input: str, output: str, params: str) -> List[str]:
command = f"{tool} {input} {output} {params}"# this is how mmseqs does it
command = f"{tool} {input} {output} {params}" # this is how mmseqs does it
print(command)
escaped_command = shlex.split(command)
return escaped_command
Expand Down
6 changes: 4 additions & 2 deletions src/dnaapler/utils/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def process_MMseqs2_output_and_reorient(

# define colnames
col_list = [
"qseqid",
"qseqid",
"qlen",
"sseqid",
"slen",
Expand Down Expand Up @@ -88,7 +88,9 @@ def process_MMseqs2_output_and_reorient(
# https://github.com/gbouras13/dnaapler/issues/44

else:
if MMseqs2_df["qseq"][0][0] in ["M", "V", "L"] and (MMseqs2_df["sstart"][0] == 1):
if MMseqs2_df["qseq"][0][0] in ["M", "V", "L"] and (
MMseqs2_df["sstart"][0] == 1
):
reorient_sequence(MMseqs2_df, input, out_file, gene, overlapping_orf=False)
else: # this will reorient the sequence with the orf that overlaps the tophit by the most
# warn if the top hit doesnt begin with a valid start codon
Expand Down
10 changes: 5 additions & 5 deletions src/dnaapler/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
"""

import os
import shutil
import subprocess as sp
import sys
import time
from pathlib import Path
import shutil

import click
import pyrodigal
Expand Down Expand Up @@ -119,7 +119,6 @@ def check_mmseqs2_version():
message = "Checking MMseqs2 installation."
logger.info(message)
try:

process = sp.Popen(["mmseqs"], stdout=sp.PIPE, stderr=sp.STDOUT)
mmseqs_out, _ = process.communicate()
mmseqs_out = mmseqs_out.decode()
Expand All @@ -129,7 +128,7 @@ def check_mmseqs2_version():
break
else:
raise ValueError("MMseqs2 version not found")

mmseqs_major_version = int(mmseqs_version.split(".")[0])
mmseqs_minor_version = mmseqs_version.split(".")[1]

Expand All @@ -139,7 +138,7 @@ def check_mmseqs2_version():

if mmseqs_major_version != 15:
logger.error("MMseqs2 is the wrong version. Please install v15.6f452")
if mmseqs_minor_version != '6f452':
if mmseqs_minor_version != "6f452":
logger.error("MMseqs2 is the wrong version. Please install v15.6f452")

logger.info("MMseqs2 version is ok.")
Expand Down Expand Up @@ -226,6 +225,7 @@ def check_duplicate_headers(fasta_file: Path) -> None:
header_set.add(header)
# if it finished it will be fine


def remove_directory(dir_path: Path) -> None:
"""
Remove a directory and all its contents if it exists.
Expand All @@ -237,4 +237,4 @@ def remove_directory(dir_path: Path) -> None:
None
"""
if dir_path.exists():
shutil.rmtree(dir_path)
shutil.rmtree(dir_path)
6 changes: 3 additions & 3 deletions tests/test_dnaapler.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ def test_process_MMseqs2_output_and_reorient_invalid_MMseqs2_file(self):

def test_process_MMseqs2_output_and_reorient_already_oriented(self):
# Test scenario where the MMseqs2 output suggests the contig is already oriented correctly
MMseqs2_file = os.path.join(test_data, "SAOMS1_MMseqs2_output_already_oriented.txt")
MMseqs2_file = os.path.join(
test_data, "SAOMS1_MMseqs2_output_already_oriented.txt"
)
input = os.path.join(test_data, "SAOMS1.fasta")
output = os.path.join(test_data, "fake_reoriented.fasta")
gene = "terL"
Expand Down Expand Up @@ -272,5 +274,3 @@ def test_evalue_nearest(self):
ctx = "1"
param = "2"
val = validate_choice_autocomplete(ctx, param, value)


1 change: 0 additions & 1 deletion tests/test_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,6 @@ def test_bulk_phage_with_chrom(self):
cmd = f"dnaapler bulk -m chromosome -i {input_fasta} -o {outdir} -t 1 -f"
exec_command(cmd)


def test_all_autocomplete_mystery_too_small(self):
"""test all where the autocompletion mystery fails as the contig has < 4 CDS"""
with self.assertRaises(RuntimeError):
Expand Down

0 comments on commit 7675f08

Please sign in to comment.