nf-core · vagkaratzas · Feb 13, 2025 · Feb 7, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
@@ -45,9 +45,6 @@ jobs:
 
       - name: Launch workflow via Seqera Platform
         uses: seqeralabs/action-tower-launch@v2
-        # TODO nf-core: You can customise AWS full pipeline tests as required
-        # Add full size test data (but still relatively small datasets for few samples)
-        # on the `test_full.config` test runs with only one set of parameters
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
           access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}

diff --git a/.nf-core.yml b/.nf-core.yml
@@ -18,4 +18,4 @@ template:
   skip_features:
     - igenomes
     - fastqc
-  version: 1.0.0
+  version: 1.1.0dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,12 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.1.0 - [yyyy/mm/dd]
+
+### `Fixed`
+
+- [#29](https://github.com/nf-core/proteinfamilies/pull/29) - Fix `hmmalign` empty input crash error, by preventing the `FILTER_RECRUITED` module from creating an empty output .fasta.gz file, when there are no remaining sequences after filtering the `hmmsearch` results [#28](https://github.com/nf-core/proteinfamilies/issues/28)
+
 ## v1.0.0 - [2025/02/05]
 
 Initial release of nf-core/proteinfamilies, created with the [nf-core](https://nf-co.re/) template.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/proteinfamilies/releases/tag/1.0.0"
+  This report has been generated by the <a href="https://github.com/nf-core/proteinfamilies/tree/dev"
   target="_blank">nf-core/proteinfamilies</a> analysis pipeline. For information about
-  how to interpret these results, please see the <a href="https://nf-co.re/proteinfamilies/1.0.0/docs/output"
+  how to interpret these results, please see the <a href="https://nf-co.re/proteinfamilies/dev/docs/output"
   target="_blank">documentation</a>.
 report_section_order:
   "nf-core-proteinfamilies-methods-description":

diff --git a/bin/filter_recruited.py b/bin/filter_recruited.py
@@ -103,32 +103,35 @@ def validate_and_parse_hit_name(hit):
 
 
 def extract_fasta_subset(filtered_sequences, fasta, out_fasta):
-    open_func = gzip.open if fasta.endswith(".gz") else open
-    with open_func(fasta, "rt") as in_fasta:
-        fasta_dict = {record.id: str(record.seq) for record in SeqIO.parse(in_fasta, "fasta")}
-
-    with gzip.open(out_fasta, "wt") as out_file:
-        for filtered_sequence in filtered_sequences:
-            try:
-                sequence_name, env_from, env_to = validate_and_parse_hit_name(filtered_sequence)
-
-                # Get the original sequence
-                original_record = fasta_dict[sequence_name]
-
-                # Extract the specific range (adjust indices for 0-based indexing)
-                extracted_seq = original_record[env_from-1:env_to]
-
-                # Determine the new sequence ID
-                if len(extracted_seq) == len(original_record):
-                    new_id = sequence_name  # Omit range if full-length
-                else:
-                    new_id = f"{sequence_name}/{env_from}-{env_to}"
-
-                out_file.write(f">{new_id}\n{extracted_seq}\n")
-            except KeyError:
-                print(f"Sequence {sequence_name} not found in the input FASTA.", file=sys.stderr)
-            except ValueError as e:
-                print(e, file=sys.stderr)
+    if filtered_sequences:
+        open_func = gzip.open if fasta.endswith(".gz") else open
+        with open_func(fasta, "rt") as in_fasta:
+            fasta_dict = {record.id: str(record.seq) for record in SeqIO.parse(in_fasta, "fasta")}
+
+        with gzip.open(out_fasta, "wt") as out_file:
+            for filtered_sequence in filtered_sequences:
+                try:
+                    sequence_name, env_from, env_to = validate_and_parse_hit_name(filtered_sequence)
+
+                    # Get the original sequence
+                    original_record = fasta_dict[sequence_name]
+
+                    # Extract the specific range (adjust indices for 0-based indexing)
+                    extracted_seq = original_record[env_from-1:env_to]
+
+                    # Determine the new sequence ID
+                    if len(extracted_seq) == len(original_record):
+                        new_id = sequence_name  # Omit range if full-length
+                    else:
+                        new_id = f"{sequence_name}/{env_from}-{env_to}"
+
+                    out_file.write(f">{new_id}\n{extracted_seq}\n")
+                except KeyError:
+                    print(f"Sequence {sequence_name} not found in the input FASTA.", file=sys.stderr)
+                except ValueError as e:
+                    print(e, file=sys.stderr)
+    else:
+        print("No filtered sequences remained to write out. Skipping out_fasta file creation.")
 
 
 def filter_recruited(domtbl, fasta, length_threshold, out_fasta):

diff --git a/modules/local/filter_recruited/main.nf b/modules/local/filter_recruited/main.nf
@@ -12,7 +12,7 @@ process FILTER_RECRUITED {
     val(length_threshold)
 
     output:
-    tuple val(meta), path("*.fasta.gz"), emit: fasta
+    tuple val(meta), path("*.fasta.gz"), emit: fasta, optional: true
     path "versions.yml"                , emit: versions
 
     when:

diff --git a/nextflow.config b/nextflow.config
@@ -271,7 +271,7 @@ manifest {
     mainScript      = 'main.nf'
     defaultBranch   = 'master'
     nextflowVersion = '!>=24.04.2'
-    version         = '1.0.0'
+    version         = '1.1.0dev'
     doi             = ''
 }
 

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json