Skip to content

Commit

Permalink
Udpated code with latest comment in PR
Browse files Browse the repository at this point in the history
  • Loading branch information
luissian committed Jan 23, 2024
1 parent 55dbd45 commit 573a253
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 34 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Set up Miniconda
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: myenv
activate-environment: taranis_env
environment-file: environment.yml

- name: Verify Conda environment
Expand All @@ -25,7 +25,7 @@ jobs:
- name: Run your script
run: |
source $CONDA/etc/profile.d/conda.sh
conda activate myenv
conda activate taranis_env
pip install .
taranis analyze-schema -i test/MLST_listeria -o analyze_schema_test --cpus 1
2 changes: 1 addition & 1 deletion taranis/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def allele_calling(
try:
os.makedirs(output)
except OSError as e:
log.info("Unable to create folder at %s because %s", output, e)
log.info("Unable to create folder at %s with error %s", output, e)
stderr.print("[red] ERROR. Unable to create folder " + output)
sys.exit(1)
# Filter fasta files from reference folder
Expand Down
9 changes: 4 additions & 5 deletions taranis/analyze_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from Bio import SeqIO

from collections import OrderedDict
from collections import OrderedDict, defaultdict

import taranis.utils

Expand Down Expand Up @@ -140,11 +140,10 @@ def check_allele_quality(self, prokka_annotation: dict) -> OrderedDict:
# get the unique sequences and compare the length with all sequences
unique_seq = list(set(list(allele_seq.values())))
if len(unique_seq) < len(allele_seq):
tmp_dict = {}
value_to_keys = defaultdict(list)
for rec_id, seq_value in allele_seq.items():
if seq_value not in tmp_dict:
tmp_dict[seq_value] = 0
else:
value_to_keys[seq_value].append(rec_id)
if len(value_to_keys[seq_value]) > 1:
a_quality[rec_id]["quality"] = "Bad quality"
a_quality[rec_id]["reason"] = "Duplicate allele"
if self.remove_duplicated:
Expand Down
52 changes: 26 additions & 26 deletions taranis/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from pathlib import Path
from Bio.Blast.Applications import NcbiblastnCommandline
import pdb

log = logging.getLogger(__name__)
stderr = rich.console.Console(
Expand Down Expand Up @@ -54,36 +53,37 @@ def create_blastdb(self, file_name, blast_dir):

def run_blast(
self,
query,
evalue=0.001,
perc_identity=90,
reward=1,
penalty=-2,
gapopen=1,
gapextend=1,
max_target_seqs=10,
max_hsps=10,
num_threads=1,
):
"""_summary_
blastn -outfmt "6 , qseqid , sseqid , pident , qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq" -query /media/lchapado/Reference_data/proyectos_isciii/taranis/documentos_antiguos/pasteur_schema/lmo0002.fasta -db /media/lchapado/Reference_data/proyectos_isciii/taranis/test/blastdb/RA-L2073_R1/RA-L2073_R1 -evalue 0.001 -penalty -2 -reward 1 -gapopen 1 -gapextend 1 -perc_identity 100 > /media/lchapado/Reference_data/proyectos_isciii/taranis/test/blast_sample_locus002.txt
query: str,
evalue: float = 0.001,
perc_identity: int = 90,
reward: int = 1,
penalty: int = -2,
gapopen: int = 1,
gapextend: int = 1,
max_target_seqs: int = 1000,
max_hsps: int = 10,
num_threads: int = 1,
) -> list:
"""blast command is executed, returning a list of each match found
Args:
query (_type_): _description_
evalue (float, optional): _description_. Defaults to 0.001.
perc_identity (int, optional): _description_. Defaults to 90.
reward (int, optional): _description_. Defaults to 1.
penalty (int, optional): _description_. Defaults to -2.
gapopen (int, optional): _description_. Defaults to 1.
gapextend (int, optional): _description_. Defaults to 1.
max_target_seqs (int, optional): _description_. Defaults to 10.
max_hsps (int, optional): _description_. Defaults to 10.
num_threads (int, optional): _description_. Defaults to 1.
query (str): file path which contains the fasta sequence to query
evalue (float, optional): filtering results on e-value. Defaults to 0.001.
perc_identity (int, optional): percentage of identity. Defaults to 90.
reward (int, optional): value for rewardin. Defaults to 1.
penalty (int, optional): penalty value. Defaults to -2.
gapopen (int, optional): value for gap open. Defaults to 1.
gapextend (int, optional): value for gap extended. Defaults to 1.
max_target_seqs (int, optional): max target to output. Defaults to 1000.
max_hsps (int, optional): max hsps. Defaults to 10.
num_threads (int, optional): number of threads. Defaults to 1.
Returns:
list: list of strings containing blast results
"""
blast_parameters = '"6 , qseqid , sseqid , pident , qlen , length , mismatch , gapopen , evalue , bitscore , sstart , send , qstart , qend , sseq , qseq"'
pdb.set_trace()
# db=self.blast_dir, evalue=evalue, perc_identity=perc_identity_ref, reward=reward, penalty=penalty, gapopen=gapopen, gapextend=gapextend, outfmt=blast_parameters, max_target_seqs=max_target_seqs, max_hsps=max_hsps, num_threads=num_threads, query=core_reference_allele_path)
cline = NcbiblastnCommandline(
task="blastn",
db=self.out_blast_dir,
evalue=evalue,
perc_identity=perc_identity,
Expand Down

0 comments on commit 573a253

Please sign in to comment.