diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 84250fbe..82521b67 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 + uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: 3.7 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 5e62f62f..eeab8b33 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 + uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: 3.7 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 464b7f7a..f44099bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,9 +26,9 @@ jobs: uses: actions/checkout@v2 - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v1 + uses: technote-space/get-diff-action@v4 with: - PREFIX_FILTER: | + FILES: | Dockerfile environment.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 15b83a6a..7a343fe8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,10 @@ * Update conda environment with new packages and updates * Added `--protocol custom` to allow custom adapter trimming modes [[#41]](https://github.com/nf-core/smrnaseq/issues/41)] * Fix error when UMI is on the reads header: [[#35](https://github.com/nf-core/smrnaseq/issues/35)] +* Updated `params.mirtrace_species` to be properly initialised when using `--genome`, for all iGenomes species +* Made `params.mature` and `params.hairpin` default to miRBase FTP URL so that the file is automatically downloaded if not provided +* Allow `.fa` or `.fa.gz` files for mature and hairpin FASTA files. +* Add full-size benchmark / test dataset to run on AWS for each release (see `test_full.config`) ### Packaged software updates diff --git a/conf/igenomes.config b/conf/igenomes.config index caeafceb..088a3a60 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -22,6 +22,7 @@ params { mito_name = "MT" macs_gsize = "2.7e9" blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + mirtrace_species = "hsa" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" @@ -34,6 +35,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" @@ -47,6 +49,7 @@ params { mito_name = "MT" macs_gsize = "1.87e9" blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" + mirtrace_species = "mmu" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" @@ -58,6 +61,7 @@ params { bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" mito_name = "Mt" + mirtrace_species = "ath" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" @@ -68,6 +72,7 @@ params { gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + // mirtrace_species = "bsu" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -79,6 +84,7 @@ params { bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "bta" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" @@ -90,6 +96,7 @@ params { bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" mito_name = "MtDNA" macs_gsize = "9e7" + mirtrace_species = "cel" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -101,6 +108,7 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "cfa" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" @@ -111,6 +119,7 @@ params { gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "dre" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" @@ -122,6 +131,7 @@ params { bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" mito_name = "M" macs_gsize = "1.2e8" + mirtrace_species = "dme" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -133,6 +143,7 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + // mirtrace_species = "ecb" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" @@ -143,6 +154,7 @@ params { gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + // mirtrace_species = "ecd" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" @@ -153,6 +165,7 @@ params { gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "gga" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" @@ -163,6 +176,7 @@ params { gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + // mirtrace_species = "gmx" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" @@ -174,6 +188,7 @@ params { bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" mito_name = "MT" + // mirtrace_species = "mcc" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" @@ -184,6 +199,7 @@ params { gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + mirtrace_species = "osa" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" @@ -195,6 +211,7 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "ptr" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" @@ -205,6 +222,7 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" mito_name = "MT" + mirtrace_species = "rno" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" @@ -216,6 +234,7 @@ params { bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" mito_name = "MT" macs_gsize = "1.2e7" + // mirtrace_species = "sce" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" @@ -228,6 +247,7 @@ params { readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" mito_name = "MT" macs_gsize = "1.21e7" + // mirtrace_species = "spo" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" @@ -238,6 +258,7 @@ params { gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + mirtrace_species = "sbi" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" @@ -249,6 +270,7 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + mirtrace_species = "ssc" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" @@ -259,6 +281,7 @@ params { gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" mito_name = "Mt" + mirtrace_species = "zma" } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" @@ -271,6 +294,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + mirtrace_species = "hsa" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" @@ -284,6 +308,7 @@ params { mito_name = "chrM" macs_gsize = "2.7e9" blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" + mirtrace_species = "hsa" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" @@ -297,6 +322,7 @@ params { mito_name = "chrM" macs_gsize = "1.87e9" blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" + mirtrace_species = "mmu" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" @@ -307,6 +333,7 @@ params { gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + mirtrace_species = "bta" } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" @@ -319,6 +346,7 @@ params { readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" mito_name = "chrM" macs_gsize = "9e7" + mirtrace_species = "cel" } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" @@ -330,6 +358,7 @@ params { bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "cfa" } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" @@ -341,6 +370,7 @@ params { bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "1.37e9" + mirtrace_species = "dre" } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" @@ -352,6 +382,7 @@ params { bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "1.2e8" + mirtrace_species = "dme" } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -363,6 +394,7 @@ params { bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + // mirtrace_species = "ecb" } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" @@ -374,6 +406,7 @@ params { bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "gga" } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" @@ -385,6 +418,7 @@ params { bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "ptr" } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" @@ -395,6 +429,7 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + mirtrace_species = "rno" } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" @@ -405,6 +440,7 @@ params { readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" macs_gsize = "1.2e7" + // mirtrace_species = "sce" } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" @@ -416,6 +452,7 @@ params { bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + mirtrace_species = "ssc" } } } diff --git a/conf/test.config b/conf/test.config index aa5edfd3..3ae89828 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,7 +29,7 @@ params { 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.4.ebwt', 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.rev.1.ebwt', 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.rev.1.ebwt' -] + ] gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genes.gtf' mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' diff --git a/conf/test_full.config b/conf/test_full.config index 8461afe9..7a26ee7f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -12,10 +12,38 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed input_paths = [ - ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']], - ['SRR389222', ['https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub2.fastq.gz']] + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398625_GSM2560978_control_preclinic3_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398623_GSM2560976_control_preclinic1_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398626_GSM2560979_control_preclinic4_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398624_GSM2560977_control_preclinic2_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398627_GSM2560980_control_preclinic5_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398628_GSM2560981_control_preclinic6_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398631_GSM2560984_preclinic3_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398629_GSM2560982_preclinic1_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398630_GSM2560983_preclinic2_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398632_GSM2560985_preclinic4_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398633_GSM2560986_preclinic5_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398634_GSM2560987_preclinic6_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398636_GSM2560989_preclinic7_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398639_GSM2560992_control_clinic3_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398635_GSM2560988_control_preclinic7_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398638_GSM2560991_control_clinic2_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398637_GSM2560990_control_clinic1_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398640_GSM2560993_control_clinic4_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398641_GSM2560994_control_clinic5_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398643_GSM2560996_clinic1_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398642_GSM2560995_control_clinic6_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398644_GSM2560997_clinic2_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398645_GSM2560998_clinic3_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398646_GSM2560999_clinic4_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398649_GSM2561002_control_clinic7_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398647_GSM2561000_clinic5_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398648_GSM2561001_clinic6_Homo_sapiens_ncRNA-Seq.fastq.gz', + 's3://nf-core-awsmegatests/smrnaseq/input_data/SRR5398650_GSM2561003_clinic7_Homo_sapiens_ncRNA-Seq.fastq.gz' ] + + genome = 'GRCh38' } + + diff --git a/main.nf b/main.nf index c6af4bf4..baf6675a 100644 --- a/main.nf +++ b/main.nf @@ -78,10 +78,9 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome // Genome options params.bt_index = params.genome ? params.genomes[ params.genome ].bowtie ?: false : false -params.mature = params.genome ? params.genomes[ params.genome ].mature ?: false : false -params.hairpin = params.genome ? params.genomes[ params.genome ].hairpin ?: false : false params.mirtrace_species = params.genome ? params.genomes[ params.genome ].mirtrace_species ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false +params.mirna_gtf = params.mirtrace_species ? "ftp://mirbase.org/pub/mirbase/CURRENT/genomes/${params.mirtrace_species}.gff3" : false // Define regular variables so that they can be overwritten clip_r1 = params.clip_r1 @@ -114,50 +113,37 @@ if (params.protocol == "illumina"){ protocol = params.protocol } -if (!params.mirna_gtf && params.mirtrace_species){ - mirna_gtf = file("ftp://mirbase.org/pub/mirbase/CURRENT/genomes/${params.mirtrace_species}.gff3", checkIfExists: true) -}else if (params.mirna_gtf) { +if (params.mirna_gtf) { mirna_gtf = file(params.mirna_gtf, checkIfExists: true) -}else{ +} else { mirna_gtf = false } // Validate inputs - if (params.skip_mirdeep){ - if (params.mature) { mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature file not found: ${params.mature}" } - if (params.hairpin) { hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin file not found: ${params.hairpin}" } - if (params.gtf) { gtf = file(params.gtf, checkIfExists: true) } - indices_mirdeep2 = Channel.empty() - fasta = Channel.empty() -} -else{ - if (params.references_parsed){Channel.empty() - fasta = file("$params.references_parsed/genome.fa", checkIfExists: true) - hairpin = file("$params.references_parsed/hairpin.fa", checkIfExists: true) - mature = file("$params.references_parsed/mature.fa", checkIfExists: true) - indices_mirdeep2 = Channel - .fromPath("$params.references_parsed/genome.*.ebwt", checkIfExists: true) - .ifEmpty { exit 1, "Reference parsed genome indices not found: ${references_parsed}"} - } - else{ - if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature file not found: ${params.mature}" } - if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin file not found: ${params.hairpin}" } - if (params.fasta) {reference_genome = file(params.fasta, checkIfExists: true) } else { exit 1, "Reference genome file not found: ${params.fasta}" } - } + if (params.mature) { mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature file not found: ${params.mature}" } + if (params.hairpin) { hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin file not found: ${params.hairpin}" } + indices_mirdeep2 = Channel.empty() + fasta = Channel.empty() +} else { + if (params.references_parsed){ + fasta = file("$params.references_parsed/genome.fa", checkIfExists: true) + hairpin = file("$params.references_parsed/hairpin.fa", checkIfExists: true) + mature = file("$params.references_parsed/mature.fa", checkIfExists: true) + indices_mirdeep2 = Channel.fromPath("$params.references_parsed/genome.*.ebwt", checkIfExists: true).ifEmpty { exit 1, "Reference parsed genome indices not found: ${references_parsed}"} + } else { + if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature file not found: ${params.mature}" } + if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin file not found: ${params.hairpin}" } + if (params.fasta) { reference_genome = file(params.fasta, checkIfExists: true) } else { exit 1, "Reference genome file not found: ${params.fasta}" } + } } if( params.bt_index ){ - bt_indices = Channel - .fromPath("${params.bt_index}*", checkIfExists: true) - .ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_dir}" } -} - -else if( params.bt_indices ){ - bt_indices = Channel.from(params.input_paths).map{ file(it) }.toList() -} -if( !params.bt_index) { - log.info "No GTF / Bowtie 1 index supplied - host reference genome analysis will be skipped." + bt_indices = Channel.fromPath("${params.bt_index}*", checkIfExists: true).ifEmpty { exit 1, "Bowtie1 index directory not found: ${bt_dir}" } +} else if( params.bt_indices ){ + bt_indices = Channel.from(params.bt_indices).map{ file(it) }.toList() +} else { + log.info "No Bowtie 1 index supplied - host reference genome analysis will be skipped." } if( !params.mirtrace_species ){ exit 1, "Reference species for miRTrace is not defined." @@ -327,9 +313,24 @@ if (!params.references_parsed && !params.skip_mirdeep){ script: """ + # Uncompress FASTA reference files if necessary + MATURE="$mature" + HAIRPIN="$hairpin" + if [ \${MATURE: -3} == ".gz" ]; then + gunzip "$mature" + fi + if [ \${HAIRPIN: -3} == ".gz" ]; then + gunzip "$hairpin" + fi + + # Remove any special base characters from reference genome FASTA file sed -i '/^[^>]/s/[^ATGCatgc]/N/g' $refgenome + + # Remove spaces from miRBase FASTA files sed -i 's, ,_,g' $hairpin sed -i 's, ,_,g' $mature + + # Build bowtie index bowtie-build $refgenome genome """ } diff --git a/nextflow.config b/nextflow.config index 277c7b05..a764d210 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,19 @@ params { input = "data/*.fastq.gz" outdir = './results' protocol = 'illumina' + + // Reference genomes genome = false + mature = false + hairpin = false + mirna_gtf = false + fasta = false + bt_index = false + mirtrace_species = false + references_parsed = false + mature = "ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz" + hairpin = "ftp://mirbase.org/pub/mirbase/CURRENT/hairpin.fa.gz" + clip_r1 = 0 three_prime_clip_r1 = 0 three_prime_adapter = "TGGAATTCTCGGGTGCCAAGG" @@ -23,7 +35,6 @@ params { skip_mirdeep = false save_reference = true seq_center = "" - references_parsed = false // Boilerplate options name = false @@ -85,6 +96,7 @@ profiles { podman.enabled = true } test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } } // Load igenomes.config if required diff --git a/nextflow_schema.json b/nextflow_schema.json index 6d8825f5..eede33e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -69,18 +69,28 @@ "fasta": { "type": "string", "fa_icon": "fas fa-font", - "description": "Path to FASTA genome file.", + "description": "Path to reference genome FASTA genome file.", "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." }, + "mirna_gtf": { + "type": "string", + "description": "GFF/GTF file with coordinates positions of precursor and miRNAs.", + "help_text": "miRBase `.gff3` file, typically downloadedfrom [`ftp://mirbase.org/pub/mirbase/CURRENT/genomes/`](ftp://mirbase.org/pub/mirbase/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", + "fa_icon": "fas fa-address-book" + }, "mature": { "type": "string", "description": "Path to FASTA file with mature miRNAs.", - "fa_icon": "fas fa-wheelchair" + "fa_icon": "fas fa-wheelchair", + "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", + "default": "ftp://mirbase.org/pub/mirbase/CURRENT/mature.fa.gz" }, "hairpin": { "type": "string", "description": "Path to FASTA file with miRNAs precursors.", - "fa_icon": "fab fa-cuttlefish" + "fa_icon": "fab fa-cuttlefish", + "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", + "default": "ftp://mirbase.org/pub/mirbase/CURRENT/hairpin.fa.gz" }, "bt_index": { "type": "string",