From aead582bdb450ea5215a9bee9864dfe605cd35dd Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 30 Jan 2024 15:10:59 +1030 Subject: [PATCH 1/4] fix bug with reverse strand if pyrodigal overlap called --- pyproject.toml | 2 +- src/dnaapler/utils/processing.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 51ea681..59e675d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dnaapler" -version = "0.5.2" # change VERSION too +version = "0.6.0" # change VERSION too description = "Reorients assembled microbial sequences" authors = ["George Bouras "] license = "MIT" diff --git a/src/dnaapler/utils/processing.py b/src/dnaapler/utils/processing.py index 28a4034..230aa68 100644 --- a/src/dnaapler/utils/processing.py +++ b/src/dnaapler/utils/processing.py @@ -224,9 +224,17 @@ def reorient_sequence( # Find the gene with the max overlap closest_gene_index = max(overlap_dict, key=lambda key: overlap_dict[key]) - start = genes[closest_gene_index].begin + # get strand strand = genes[closest_gene_index].strand + # susie error 30-01-24 - misorienting on the negative strand + # 'begin' just gives the lowest value, not the start, so was putting the terL at the end i.e. reorienting from the end of terL + # therefore need to take end + if strand == 1: + start = genes[closest_gene_index].begin + elif strand == -1: + start = genes[closest_gene_index].end + if strand == 1: strand_eng = "forward" else: From 680a7f7782c8385c22949b7ba2a0e125c85d26b1 Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 30 Jan 2024 15:27:10 +1030 Subject: [PATCH 2/4] also fix for all --- src/dnaapler/utils/processing.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/dnaapler/utils/processing.py b/src/dnaapler/utils/processing.py index 230aa68..a4aa6ca 100644 --- a/src/dnaapler/utils/processing.py +++ b/src/dnaapler/utils/processing.py @@ -235,6 +235,8 @@ def reorient_sequence( elif strand == -1: start = genes[closest_gene_index].end + start = genes[closest_gene_index].begin + if strand == 1: strand_eng = "forward" else: @@ -413,9 +415,16 @@ def reorient_single_record_bulk( # Find the gene with the max overlap closest_gene_index = max(overlap_dict, key=lambda key: overlap_dict[key]) - start = genes[closest_gene_index].begin strand = genes[closest_gene_index].strand + # susie error 30-01-24 - misorienting on the negative strand + # 'begin' just gives the lowest value, not the start, so was putting the terL at the end i.e. reorienting from the end of terL + # therefore need to take end + if strand == 1: + start = genes[closest_gene_index].begin + elif strand == -1: + start = genes[closest_gene_index].end + #################### # reorientation #################### From 2a93943666e69e803a12c875db575de717a170cc Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 30 Jan 2024 15:31:40 +1030 Subject: [PATCH 3/4] update history --- HISTORY.md | 4 ++++ src/dnaapler/utils/VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 6ffcf5b..8afb40f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # History +# 0.6.0 (2024-01-31) + +* Fixes bug where if the starting gene (dnaA/terL/repA) was on the reverse strand and the top BLAST hit did not find the start codon, it would reorient the replicon to begin at the end of the starting gene, not the start. Thanks @susiegriggo + # 0.5.2 (2024-01-24) * Bumps version to include updated citation diff --git a/src/dnaapler/utils/VERSION b/src/dnaapler/utils/VERSION index 2411653..09a3acf 100644 --- a/src/dnaapler/utils/VERSION +++ b/src/dnaapler/utils/VERSION @@ -1 +1 @@ -0.5.2 \ No newline at end of file +0.6.0 \ No newline at end of file From a296b9b1e54a3eb6e003a056c4851ec1199a90af Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Tue, 30 Jan 2024 15:32:54 +1030 Subject: [PATCH 4/4] relax bulk restraints #71 --- src/dnaapler/utils/validation.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/dnaapler/utils/validation.py b/src/dnaapler/utils/validation.py index e58e4e8..801dee3 100644 --- a/src/dnaapler/utils/validation.py +++ b/src/dnaapler/utils/validation.py @@ -68,10 +68,10 @@ def validate_fasta(input_fasta: Path) -> None: def validate_fasta_bulk(input_fasta: Path) -> None: """ - Validates FASTA input - that the input is a FASTA with > 1 sequence + Validates FASTA input - that the input is a FASTA with at least 1 sequence """ logger.info( - f"Checking that the input file {input_fasta} is in FASTA format and has more than 1 entry." + f"Checking that the input file {input_fasta} is in FASTA format and has at least 1 entry." ) # to get extension with open(input_fasta, "r") as handle: @@ -83,14 +83,14 @@ def validate_fasta_bulk(input_fasta: Path) -> None: f"Error: {input_fasta} file is not in the FASTA format. Please check your input file" ) - with open(input_fasta, "r") as handle: - # Check the number of records - if len(list(SeqIO.parse(handle, "fasta"))) == 1: - logger.error( - f"{input_fasta} has only one entry, but more than one was expected. Please check your input FASTA file!" - ) - else: - logger.info(f"{input_fasta} has more than one entry.") + # with open(input_fasta, "r") as handle: + # # Check the number of records + # if len(list(SeqIO.parse(handle, "fasta"))) == 1: + # logger.error( + # f"{input_fasta} has only one entry, but more than one was expected. Please check your input FASTA file!" + # ) + # else: + # logger.info(f"{input_fasta} has more than one entry.") def validate_fasta_all(input_fasta: Path) -> None: