diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 970ab7600d60..adbdbd7ba627 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -22,6 +22,8 @@ process CAT_CAT { def args2 = task.ext.args2 ?: '' def file_list = files_in.collect { it.toString() } + // choose appropriate concatenation tool depending on input and output format + // | input | output | command1 | command2 | // |-----------|------------|----------|----------| // | gzipped | gzipped | cat | | @@ -30,7 +32,7 @@ process CAT_CAT { // | ungzipped | gzipped | cat | pigz | // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' @@ -68,3 +70,10 @@ process CAT_CAT { END_VERSIONS """ } + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index ed5a4f12eeb5..aaae04f90b0f 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -83,8 +83,7 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + { assert snapshot(process.out).match() } ) } } @@ -142,8 +141,7 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 423571ba274b..0c9bfe8d0e06 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,10 +1,4 @@ { - "test_cat_unzipped_zipped_size": { - "content": [ - 375 - ], - "timestamp": "2023-10-16T14:33:08.049445686" - }, "test_cat_unzipped_unzipped": { "content": [ { @@ -67,31 +61,36 @@ ], "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped_lines": { - "content": [ - [ - "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", - "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", - "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", - "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", - "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" - ] - ], - "timestamp": "2023-10-16T14:32:33.629048645" - }, - "test_cat_unzipped_zipped_lines": { + "test_cat_zipped_zipped": { "content": [ - [ - ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", - "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", - "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", - "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", - "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", - "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } ], - "timestamp": "2023-10-16T14:33:08.038830506" + "timestamp": "2024-01-12T14:02:02.999254641" }, "test_cat_one_file_unzipped_zipped_lines": { "content": [ @@ -106,16 +105,41 @@ ], "timestamp": "2023-10-16T14:33:21.39642399" }, - "test_cat_zipped_zipped_size": { + "test_cat_unzipped_zipped": { "content": [ - 78 + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } ], - "timestamp": "2023-10-16T14:32:33.641869244" + "timestamp": "2024-01-12T14:08:26.948048418" }, "test_cat_one_file_unzipped_zipped_size": { "content": [ 374 ], - "timestamp": "2023-10-16T14:33:21.4094373" + "timestamp": "2024-01-12T14:10:22.445700266" } -} \ No newline at end of file +}