seqeralabs · drpatelh · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/.editorconfig b/.editorconfig
@@ -32,3 +32,5 @@ indent_style = unset
 [LICENSE]
 indent_size = unset
 
+[*.{fa,fasta}]
+insert_final_newline = unset
diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
@@ -1,6 +1,6 @@
-name: nf-core branch protection
+name: branch protection
 # This workflow is triggered on PRs to master branch on the repository
-# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev`
+# It fails when someone tries to make a PR against the nf-chai `main` branch instead of `dev`
 on:
   pull_request_target:
     branches: [master]
@@ -9,7 +9,7 @@ jobs:
   test:
     runs-on: ubuntu-latest
     steps:
-      # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches
+      # PRs to the nf-chai repo main branch are only ok if coming from the nf-chai repo `dev` or any `patch` branches
       - name: Check PRs
         if: github.repository == 'seqeralabs/nf-chai'
         run: |
@@ -33,7 +33,7 @@ jobs:
             Hi @${{ github.event.pull_request.user.login }},
 
             It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch.
-            The `master` branch on nf-core repositories should always contain code from the latest release.
+            The `main` branch on nf-chai should always contain code from the latest release.
             Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch.
 
             You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page.

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: nf-core CI
+name: nf-chai CI
 # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
 on:
   push:
@@ -21,7 +21,7 @@ concurrency:
 jobs:
   test:
     name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})"
-    # Only run on push if this is the nf-core dev branch (merged PRs)
+    # Only run on push if this is the nf-chai dev branch (merged PRs)
     if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'seqeralabs/nf-chai') }}"
     runs-on: ubuntu-latest
     strategy:

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -1,4 +1,4 @@
-name: nf-core linting
+name: nf-chai linting
 # This workflow is triggered on pushes and PRs to the repository.
 # It runs the `nf-core pipelines lint` and markdown lint tests to ensure
 # that the code meets the nf-core guidelines.

diff --git a/.nf-core.yml b/.nf-core.yml
@@ -45,6 +45,8 @@ lint:
     - docs/images/nf-core-nf-chai_logo_dark.png
     - .github/ISSUE_TEMPLATE/bug_report.yml
     - LICENSE
+    - .github/workflows/branch.yml
+    - .github/workflows/linting.yml
   included_configs: false
   multiqc_config: false
   modules_config: false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,3 +17,4 @@ Special thanks to the following for their contributions to the release:
 ### Enhancements & fixes
 
 - [PR #1](https://github.com/seqeralabs/nf-chai/pull/1) - Delete files not required from nf-core pipeline template
+- [PR #2](https://github.com/seqeralabs/nf-chai/pull/2) - Remove additional customisations from nf-core pipeline template to simplify further
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -10,6 +10,10 @@
 
 ## Pipeline tools
 
+- [Chai-I](https://www.biorxiv.org/content/10.1101/2024.10.10.615955v1)
+
+  > Chai Discovery team, Boitreaud J, Dent J, McPartlon M, Meier J, Reis V, Rogozhonikov A, Wu K. Chai-1: Decoding the molecular interactions of life. bioRxiv. doi: 10.1101/2024.10.10.615955.
+
 ## Software packaging/containerisation tools
 
 - [Anaconda](https://anaconda.com)

diff --git a/README.md b/README.md
@@ -1,83 +1,53 @@
-# seqeralabs/nf-chai
+# nf-chai
 
 [![GitHub Actions CI Status](https://github.com/seqeralabs/nf-chai/actions/workflows/ci.yml/badge.svg)](https://github.com/seqeralabs/nf-chai/actions/workflows/ci.yml)
-[![GitHub Actions Linting Status](https://github.com/seqeralabs/nf-chai/actions/workflows/linting.yml/badge.svg)](https://github.com/seqeralabs/nf-chai/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![GitHub Actions Linting Status](https://github.com/seqeralabs/nf-chai/actions/workflows/linting.yml/badge.svg)](https://github.com/seqeralabs/nf-chai/actions/workflows/linting.yml)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)
-[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/seqeralabs/nf-chai)
 
 ## Introduction
 
-**seqeralabs/nf-chai** is a bioinformatics pipeline that ...
-
-<!-- TODO nf-core:
-   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the
-   major pipeline sections and the types of output it produces. You're giving an overview to someone new
-   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction
--->
-
-<!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
+**nf-chai** is a bioinformatics pipeline for running the [Chai-1](https://github.com/chaidiscovery/chai-lab) protein prediction algorithm on an input set of protein sequences in FASTA format. The pipeline has been written in Nextflow to generate results for downstream analysis in a reproducible, scalable and portable way.
 
 ## Usage
 
 > [!NOTE]
-> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
-
-<!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
-     Explain what rows and columns represent. For instance (please edit as appropriate):
+> If you are new to Nextflow, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
 
-First, prepare a samplesheet with your input data that looks as follows:
+First, prepare a FASTA file with your protein sequence(s) that looks as follows:
 
-`samplesheet.csv`:
+`protein_sequences.fa`:
 
-```csv
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+```txt
+>protein|name=short-protein-example
+AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPS
 ```
 
-Each row represents a fastq file (single-end) or a pair of fastq files (paired end).
-
--->
-
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
 nextflow run seqeralabs/nf-chai \
-   -profile <docker/singularity/.../institute> \
-   --input samplesheet.csv \
+   -profile <docker/singularity> \
+   --input protein_sequences.fa \
    --outdir <OUTDIR>
 ```
 
-> [!WARNING]
-> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).
-
 ## Credits
 
-seqeralabs/nf-chai was originally written by Seqera Team.
+seqeralabs/nf-chai was originally written by the Seqera Team.
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
-
 ## Contributions and Support
 
 If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
 
 ## Citations
 
-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use seqeralabs/nf-chai for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
-
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
-
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 
 This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).

diff --git a/assets/long_protein_sequence.fa b/assets/long_protein_sequence.fa
@@ -0,0 +1,2 @@
+>protein|name=short-protein-example
+AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPS
diff --git a/assets/short_protein_sequence.fa b/assets/short_protein_sequence.fa
@@ -0,0 +1,2 @@
+>protein|name=short-protein-example
+AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPS
diff --git a/conf/base.config b/conf/base.config
@@ -10,10 +10,9 @@
 
 process {
 
-    // TODO nf-core: Check the defaults for all processes
-    cpus   = { 1      * task.attempt }
-    memory = { 6.GB   * task.attempt }
-    time   = { 4.h    * task.attempt }
+    cpus   = { 1    * task.attempt }
+    memory = { 6.GB * task.attempt }
+    time   = { 4.h  * task.attempt }
 
     errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
@@ -24,7 +23,6 @@ process {
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
     //        If possible, it would be nice to keep the same label naming convention when
     //        adding in your local modules too.
-    // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
         cpus   = { 1                   }

diff --git a/conf/test.config b/conf/test.config
@@ -23,7 +23,5 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input = "${projectDir}/assets/long_protein_sequence.fa"
 }
diff --git a/conf/test_full.config b/conf/test_full.config
@@ -15,10 +15,5 @@ params {
     config_profile_description = 'Full test dataset to check pipeline function'
 
     // Input data for full size test
-    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
-
-    // Fasta references
-    fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
+    input = "${projectDir}/assets/short_protein_sequence.fa"
 }
diff --git a/main.nf b/main.nf
@@ -13,32 +13,10 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { NF_CHAI  } from './workflows/nf_chai'
-include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_nf_chai_pipeline'
-include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_nf_chai_pipeline'
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    NAMED WORKFLOWS FOR PIPELINE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// WORKFLOW: Run main analysis pipeline depending on type of input
-//
-workflow SEQERALABS_NF_CHAI {
-
-    take:
-    samplesheet // channel: samplesheet read in from --input
-
-    main:
+include { NF_CHAI                 } from './workflows/nf_chai'
+include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_seqeralabs_nf_chai_pipeline'
+include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_seqeralabs_nf_chai_pipeline'
 
-    //
-    // WORKFLOW: Run pipeline
-    //
-    NF_CHAI (
-        samplesheet
-    )
-}
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -48,6 +26,7 @@ workflow SEQERALABS_NF_CHAI {
 workflow {
 
     main:
+
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
@@ -56,23 +35,21 @@ workflow {
         params.validate_params,
         params.monochrome_logs,
         args,
-        params.outdir,
-        params.input
+        params.outdir
     )
 
     //
     // WORKFLOW: Run main workflow
     //
     ch_input = file(params.input, checkIfExists: true)
-    SEQERALABS_NF_CHAI (
+    NF_CHAI (
         ch_input
     )
 
     //
     // SUBWORKFLOW: Run completion tasks
     //
     PIPELINE_COMPLETION (
-        params.outdir,
         params.monochrome_logs
     )
 }

diff --git a/nextflow.config b/nextflow.config
@@ -9,9 +9,8 @@
 // Global default params, used in configs
 params {
 
-    // TODO nf-core: Specify your pipeline's command line flags
     // Input options
-    input                      = null
+    input                        = null
 
     // Boilerplate options
     outdir                       = null
@@ -25,7 +24,7 @@ params {
     pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
 
     // Schema validation default options
-    validate_params            = true
+    validate_params              = true
 }
 
 // Load base.config by default for all pipelines
@@ -141,24 +140,13 @@ charliecloud.registry = 'quay.io'
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
-
 env {
     PYTHONNOUSERSITE = 1
     R_PROFILE_USER   = "/.Rprofile"
     R_ENVIRON_USER   = "/.Renviron"
     JULIA_DEPOT_PATH = "/usr/local/share/julia"
 }
 
-// Set bash options
-process.shell = """\
-bash
-
-set -e # Exit if a tool returns a non-zero status/exit code
-set -u # Treat unset variables and parameters as an error
-set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute
-set -C # No clobber - prevent output redirection from overwriting files.
-"""
-
 // Disable process selector warnings by default. Use debug profile to enable warnings.
 nextflow.enable.configProcessNamesValidation = false
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -17,9 +17,9 @@
                     "format": "file-path",
                     "exists": true,
                     "mimetype": "text/csv",
-                    "pattern": "^\\S+\\.csv$",
-                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
-                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.",
+                    "pattern": "^\\S+\\.(fa|fasta)$",
+                    "description": "Path to input FASTA file containing the sequence(s) to predict.",
+                    "help_text": "The input FASTA file should contain one or more sequences in standard FASTA format.",
                     "fa_icon": "fas fa-file-csv"
                 },
                 "outdir": {
Original file line number	Diff line number	Diff line change
Expand Up		@@ -17,3 +17,4 @@ Special thanks to the following for their contributions to the release:
		### Enhancements & fixes

		- [PR #1](https://github.com/seqeralabs/nf-chai/pull/1) - Delete files not required from nf-core pipeline template
		- [PR #2](https://github.com/seqeralabs/nf-chai/pull/2) - Remove additional customisations from nf-core pipeline template to simplify further
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		>protein\|name=short-protein-example
		AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPS