diff --git a/CHANGELOG.md b/CHANGELOG.md index c7629657..6b919ecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [[#259](https://github.com/nf-core/differentialabundance/pull/259)] - Bump gtf2featureannotation to fix GTF handling error ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Added maxquant profile to nextflow.config to make it available ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Some parameter changes, added qbic credits ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#250](https://github.com/nf-core/differentialabundance/pull/250)] - Template update for nf-core/tools v2.13.1 ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#244](https://github.com/nf-core/differentialabundance/pull/244)] - Add pipeline params for matrixfilter NA options ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) @@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [[#257](https://github.com/nf-core/differentialabundance/pull/257)] - Fixed FILTER_DIFFTABLE module, updated PROTEUS module to better handle whitespace in prefix param, made docs clearer ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#254](https://github.com/nf-core/differentialabundance/pull/254)] - Made differential_file_suffix optional ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#240](https://github.com/nf-core/differentialabundance/pull/240)] - Publish GSEA reports ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) - [[#231](https://github.com/nf-core/differentialabundance/pull/231)] - Update GSEA module to fix butterfly plot bug ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index 26fb7f4e..fa8787f1 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -267,8 +267,9 @@ for (r in seq_along(contributors)) { ```{r, echo=FALSE} observations <- read_metadata(file.path(params$input_dir, params$observations), id_col = params$observations_id_col) -if (! params$observations_name_col %in% colnames(observations)){ - stop(paste('Invalid observation name column specified: ', params$observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')'))) +observations_name_col <- ifelse(!is.null(params$observations_name_col), params$observations_name_col, params$observations_id_col) +if (! observations_name_col %in% colnames(observations)){ + stop(paste('Invalid observation name column specified: ', observations_name_col, paste0('(Valid values are: ', paste(colnames(observations), collapse=', '),')'))) } if (! is.null(params$features)){ @@ -305,7 +306,7 @@ assay_data <- lapply(assay_files, function(x) { row.names = 1 ) ) - colnames(mat) <- observations[[params$observations_name_col]][match(colnames(mat), rownames(observations))] + colnames(mat) <- observations[[observations_name_col]][match(colnames(mat), rownames(observations))] mat }) @@ -316,7 +317,7 @@ if (!is.null(params$features_log2_assays)) { assay_data <- cond_log2_transform_assays(assay_data, params$features_log2_assays) # Now we can rename the observations rows using the title field -rownames(observations) <- observations[[params$observations_name_col]] +rownames(observations) <- observations[[observations_name_col]] # Run PCA early so we can understand how important each variable is pca_datas <- lapply(names(assay_data), function(assay_type){ @@ -547,7 +548,7 @@ Whiskers in the above boxplots show `r params$exploratory_whisker_distance` time plotly_densityplot( assay_data, experiment = observations, - colorby = params$observations_name_col, + colorby = observations_name_col, expressiontype = paste("count per", params$features_type), makeColorScale(length(unique(observations[[params$observations_id_col]])), palette = "Set1") ) diff --git a/conf/maxquant.config b/conf/maxquant.config index ad782b8a..57caf2d4 100644 --- a/conf/maxquant.config +++ b/conf/maxquant.config @@ -38,7 +38,7 @@ params { differential_feature_name_column = "Majority protein IDs" // Proteus options - proteus_measurecol_prefix = 'LFQ intensity ' + proteus_measurecol_prefix = 'LFQ intensity' // Shiny does not work for this datatype shinyngs_build_app = false diff --git a/docs/usage.md b/docs/usage.md index b5f9f3d9..d474d611 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -23,7 +23,11 @@ With the above in mind, running this workflow requires: --input '[path to samplesheet file]' ``` -This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare. +This may well be the same sample sheet used to generate the input matrix. For example, in RNA-seq this might be the same sample sheet, perhaps derived from [fetchngs](https://github.com/nf-core/fetchngs), that was input to the [RNA-seq workflow](https://github.com/nf-core/rnaseq). It may be necessary to add columns that describe the groups you want to compare. The columns that the pipeline requires are: + +- a column listing the sample IDs (must be the same IDs as in the abundance matrix), in the example below it is called 'sample'. For some study_types, this column might need to be filled in with file names, e.g. when doing an affymetrix analysis. +- one or more columns describing conditions for the differential analysis. In the example below it is called 'condition' +- optionally one or more columns describing sample batches or similar which you want to be considered in the analysis. In the example below it is called 'batch' For example: @@ -96,7 +100,7 @@ So we **do not recommend** raw counts files such as `salmon.merged.gene_counts.t --matrix '[path to matrix file]' ``` -This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (do not forget trailing whitespace in this parameter, if required!). +This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (one whitespace is automatically added if necessary). ### Affymetrix microarrays diff --git a/modules.json b/modules.json index fb65b199..c313b064 100644 --- a/modules.json +++ b/modules.json @@ -62,7 +62,7 @@ }, "proteus/readproteingroups": { "branch": "master", - "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "git_sha": "a069b29783583c219c1f23ed3dcf64a5aee1340b", "installed_by": ["modules"] }, "rmarkdownnotebook": { diff --git a/modules/nf-core/proteus/readproteingroups/environment.yml b/modules/nf-core/proteus/readproteingroups/environment.yml index bd44f38d..3ac338bc 100644 --- a/modules/nf-core/proteus/readproteingroups/environment.yml +++ b/modules/nf-core/proteus/readproteingroups/environment.yml @@ -1,3 +1,4 @@ +name: proteus_readproteingroups channels: - conda-forge - bioconda @@ -7,3 +8,4 @@ dependencies: - bioconda::r-proteus-bartongroup=0.2.16 - conda-forge::r-plotly=4.10.2 - bioconda::bioconductor-limma=3.54.0 + - conda-forge::r-ggplot2=3.4.4 diff --git a/modules/nf-core/proteus/readproteingroups/main.nf b/modules/nf-core/proteus/readproteingroups/main.nf index 34837410..d8e7c8a8 100644 --- a/modules/nf-core/proteus/readproteingroups/main.nf +++ b/modules/nf-core/proteus/readproteingroups/main.nf @@ -2,10 +2,10 @@ process PROTEUS_READPROTEINGROUPS { tag "$meta.id" label 'process_single' - conda 'modules/nf-core/proteus/readproteingroups/environment.yml' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0': - 'biocontainers/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-503e259d7d34ce533ce66c4c8871af4ab409db6d:1e504ef71c83943061a39b6260d826b988bfa56f-0': + 'biocontainers/mulled-v2-503e259d7d34ce533ce66c4c8871af4ab409db6d:1e504ef71c83943061a39b6260d826b988bfa56f-0' }" input: tuple val(meta), path(samplesheet), path(intensities) diff --git a/modules/nf-core/proteus/readproteingroups/meta.yml b/modules/nf-core/proteus/readproteingroups/meta.yml index 8034770a..4e67cf0c 100644 --- a/modules/nf-core/proteus/readproteingroups/meta.yml +++ b/modules/nf-core/proteus/readproteingroups/meta.yml @@ -11,7 +11,7 @@ tools: documentation: "https://rdrr.io/github/bartongroup/Proteus/" tool_dev_url: "https://github.com/bartongroup/Proteus" doi: "10.1101/416511" - licence: "['GPL v2']" + licence: ["GPL v2"] input: - meta: type: map diff --git a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R index 5806971d..f1321714 100644 --- a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R +++ b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R @@ -54,10 +54,11 @@ parse_args <- function(x) { #' @param file Input file #' @param header Passed to read.delim() #' @param row.names Passed to read.delim() +#' @param nrows Passed to read.delim() #' #' @return output Data frame -read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F) { +read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F, nrows = -1) { ext <- tolower(tail(strsplit(basename(file), split = "\\\\.")[[1]], 1)) @@ -74,7 +75,8 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam sep = separator, header = header, row.names = row.names, - check.names = check.names + check.names = check.names, + nrows = nrows ) } @@ -192,7 +194,8 @@ library(proteus) intensities.table <- read_delim_flexible( file = opt\$intensities_file, - check.names = FALSE + check.names = FALSE, + nrows = 1 # Here, we are only interested in the header of the table ) sample.sheet <- @@ -218,16 +221,19 @@ sample.sheet\$condition <- sample.sheet[[opt\$contrast_variable]] measure.cols <- setNames(paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]), sample.sheet[[opt\$sample_id_col]]) +if (!any(measure.cols %in% colnames(intensities.table))) { + measure.cols <- setNames(paste0(paste0(opt\$measure_col_prefix, " "), sample.sheet[[opt\$sample_id_col]]), sample.sheet[[opt\$sample_id_col]]) +} + # Check that all samples specified in the input sheet are present in the intensities table -missing_columns <- paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]) -missing_columns <- missing_columns[!missing_columns %in% colnames(intensities.table)] +missing_columns <- measure.cols[!measure.cols %in% colnames(intensities.table)] if (length(missing_columns) > 0) { stop(paste( length(missing_columns), 'specified samples do not have a(n)', opt\$measure_col_prefix, - 'column in intensities table. The following columns are missing:', + 'column in intensities table (tried prefix both with and without adding a whitespace), please check the value of parameter --measure_col_prefix. The following columns are missing:', paste(missing_columns, collapse = ', ') )) } diff --git a/nextflow.config b/nextflow.config index 801b8eda..abb083f8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,7 +63,7 @@ params { affy_build_annotation = true // Proteus-specific options - proteus_measurecol_prefix = 'LFQ intensity ' + proteus_measurecol_prefix = 'LFQ intensity' proteus_norm_function = 'normalizeMedian' proteus_plotsd_method = 'violin' proteus_plotmv_loess = true @@ -342,6 +342,7 @@ profiles { test_nogtf { includeConfig 'conf/test_nogtf.config' } test_full { includeConfig 'conf/test_full.config' } affy { includeConfig 'conf/affy.config' } + maxquant { includeConfig 'conf/maxquant.config' } rnaseq { includeConfig 'conf/rnaseq.config' } soft {includeConfig 'conf/soft.config'} test_affy { includeConfig 'conf/test_affy.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index f9826d19..f131c166 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -268,8 +268,9 @@ "properties": { "proteus_measurecol_prefix": { "type": "string", - "default": "LFQ intensity ", - "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity '; take care to also consider trailing whitespace between prefix and samplenames." + "default": "LFQ intensity", + "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity'; will search for both the prefix as entered and the prefix followed by one whitespace.", + "help_text": "If the sample columns are e.g. called 'LFQ intensity sample1', 'LFQ intensity sample2' etc., please set this parameter to 'LFQ intensity'." }, "proteus_norm_function": { "type": "string", @@ -1010,6 +1011,7 @@ "properties": { "report_file": { "type": "string", + "default": "${projectDir}/assets/differentialabundance_report.Rmd", "description": "Rmd report template from which to create the pipeline report", "help_text": "The pipeline will always generate a default report which gives a good overview of the analysis results. Should this default report not suit your needs, you can provide the path to a custom report instead.", "format": "file-path", @@ -1025,19 +1027,19 @@ }, "logo_file": { "type": "string", - "default": "docs/images/nf-core-differentialabundance_logo_light.png", + "default": "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png", "description": "A logo to display in the report instead of the generic pipeline logo", "fa_icon": "far fa-font-awesome-logo-full" }, "css_file": { "type": "string", - "default": "assets/nf-core_style.css", + "default": "${projectDir}/assets/nf-core_style.css", "description": "CSS to use to style the output, in lieu of the default nf-core styling", "fa_icon": "far fa-file-code" }, "citations_file": { "type": "string", - "default": "CITATIONS.md", + "default": "${projectDir}/CITATIONS.md", "description": "A markdown file containing citations to include in the fiinal report", "fa_icon": "fas fa-ad" },