diff --git a/DESCRIPTION b/DESCRIPTION index 04e8e33..26fe335 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Collate: 'msqrob-framework.R' 'allGenerics.R' Encoding: UTF-8 VignetteBuilder: knitr Roxygen: list(markdown=TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 biocViews: Proteomics, MassSpectrometry, diff --git a/NAMESPACE b/NAMESPACE index 6303259..034bf05 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(getDF) export(getDfPosterior) export(getFitMethod) export(getModel) +export(getReferencePresent) export(getSigma) export(getSigmaPosterior) export(getVar) diff --git a/R/StatModel-methods.R b/R/StatModel-methods.R index d2b3d84..80c9995 100644 --- a/R/StatModel-methods.R +++ b/R/StatModel-methods.R @@ -11,6 +11,10 @@ #' the linear model coefficients to be tested equal to zero. #' The rownames of the matrix should be equal to the names of #' parameters of the model. +#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve +#' parameters with modified reference levels are returned. Watch out putting this +#' parameter to TRUE can change the interpretation of the logFC. Default is FALSE. +#' #' @examples #' data(pe) #' # Aggregate peptide intensities in protein expression values @@ -33,11 +37,14 @@ setMethod( "getContrast", "StatModel", - function(object, L) { + function(object, L, acceptDifferentReference = FALSE) { if (!is(L, "matrix")) L <- as.matrix(L) - coefs <- getCoef(object) out <- matrix(rep(NA, ncol(L))) rownames(out) <- colnames(L) + if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) { + return(out) + } + coefs <- getCoef(object) hlp <- try(t(L) %*% coefs[rownames(L)], silent = TRUE) if (!is(hlp, "try-error")) out[] <- hlp return(out) @@ -49,10 +56,13 @@ setMethod( setMethod( "varContrast", "StatModel", - function(object, L) { + function(object, L, acceptDifferentReference = FALSE) { if (!is(L, "matrix")) L <- as.matrix(L) out <- matrix(NA, ncol(L), ncol(L)) rownames(out) <- colnames(out) <- colnames(L) + if (!referenceContrast(getReferencePresent(object), L, acceptDifferentReference)) { + return(out) + } vcovTmp <- getVcovUnscaled(object) * object@varPosterior hlp <- try(t(L) %*% vcovTmp[rownames(L), rownames(L)] %*% L, silent = TRUE) if (!is(hlp, "try-error")) out[] <- hlp diff --git a/R/accessors.R b/R/accessors.R index 5631dd2..a1322ca 100644 --- a/R/accessors.R +++ b/R/accessors.R @@ -14,6 +14,8 @@ #' \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation} #' \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix #' of the model parameters} +#' \item{getReferencePresent(object)}{to get the vector of logicals indicating if the +#' reference level associated with a model parameter is present} #' } #' #' @rdname statModelAccessors @@ -95,3 +97,9 @@ setMethod("getVcovUnscaled", signature = "StatModel", definition = function(object) object@params$vcovUnscaled ) + +#' @rdname statModelAccessors +setMethod("getReferencePresent", + signature = "StatModel", + definition = function(object) object@params$referencePresent +) \ No newline at end of file diff --git a/R/allGenerics.R b/R/allGenerics.R index 8da7437..d741ef4 100644 --- a/R/allGenerics.R +++ b/R/allGenerics.R @@ -19,7 +19,9 @@ setGeneric("getVarPosterior", function(object) standardGeneric("getVarPosterior" #' @export setGeneric("getSigmaPosterior", function(object) standardGeneric("getSigmaPosterior")) #' @export -setGeneric("getContrast", function(object, L) standardGeneric("getContrast")) +setGeneric("getReferencePresent", function(object) standardGeneric("getReferencePresent")) +#' @export +setGeneric("getContrast", function(object, ...) standardGeneric("getContrast")) #' @export setGeneric("getVcovUnscaled", function(object) standardGeneric("getVcovUnscaled")) #' @export diff --git a/R/hypothesisTest-methods.R b/R/hypothesisTest-methods.R index c79e40c..bd95f47 100644 --- a/R/hypothesisTest-methods.R +++ b/R/hypothesisTest-methods.R @@ -122,16 +122,16 @@ setMethod( adjust.method = "BH", modelColumn = "msqrobModels", resultsColumnNamePrefix = "", - overwrite = FALSE) { + overwrite = FALSE, + acceptDifferentReference = FALSE) { if (!(modelColumn %in% colnames(rowData(object)))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of the SummarizedExperiment object") if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults" if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast)) if ((sum(paste0(resultsColumnNamePrefix, colnames(contrast)) %in% colnames(rowData(object))) > 0) & !overwrite) stop("There is/are already column(s) with names starting with\'", resultsColumnNamePrefix, "\' in the rowData of the SummarizedExperiment object, set the argument overwrite=TRUE to replace the column(s) with the new results or use another name for the argument resultsColumnNamePrefix") for (j in seq_len(ncol(contrast))) { - contrHlp <- contrast[, j] - names(contrHlp) <- rownames(contrast) - rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) + contrHlp <- contrast[, j, drop = FALSE] + rowData(object)[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object)[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) } return(object) } @@ -149,7 +149,8 @@ setMethod( adjust.method = "BH", modelColumn = "msqrobHurdle", resultsColumnNamePrefix = "hurdle_", - overwrite = FALSE) { + overwrite = FALSE, + acceptDifferentReference = FALSE) { if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of the SummarizedExperiment") if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults" if (is.null(colnames(contrast)) & ncol(contrast) > 1) colnames(contrast) <- seq_len(ncol(contrast)) @@ -158,8 +159,8 @@ setMethod( { contrHlp <- contrast[, j] names(contrHlp) <- rownames(contrast) - intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) - countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) + intensityComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) + countComponent <- topFeatures(rowData(object)[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) sam <- cbind( intensityComponent[, seq_len(5)], @@ -205,7 +206,8 @@ setMethod( adjust.method = "BH", modelColumn = "msqrobModels", resultsColumnNamePrefix = "", - overwrite = FALSE) { + overwrite = FALSE, + acceptDifferentReference = FALSE) { if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i) if (!(modelColumn %in% colnames(rowData(object[[i]])))) stop("There is no column named \'", modelColumn, "\' with stored models of an msqrob fit in the rowData of assay ", i, "of the QFeatures object.") if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "") resultsColumnNamePrefix <- "msqrobResults" @@ -215,7 +217,7 @@ setMethod( { contrHlp <- contrast[, j] names(contrHlp) <- rownames(contrast) - rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) + rowData(object[[i]])[[paste0(resultsColumnNamePrefix, colnames(contrast)[j])]] <- topFeatures(rowData(object[[i]])[, modelColumn], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) } return(object) } @@ -232,7 +234,8 @@ setMethod( adjust.method = "BH", modelColumn = "msqrobHurdle", resultsColumnNamePrefix = "hurdle_", - overwrite = FALSE) { + overwrite = FALSE, + acceptDifferentReference = FALSE) { if (is.null(object[[i]])) stop("QFeatures object does not contain an assay with the name ", i) if (sum(paste0(modelColumn, c("Intensity", "Count")) %in% colnames(rowData(object[[i]]))) != 2) stop("There are no columns for the models of the hurdle components in the rowData of assay ", i, "of the QFeatures object.") if (is.null(colnames(contrast)) & resultsColumnNamePrefix == "hurdle_") resultsColumnNamePrefix <- "hurdleResults" @@ -242,8 +245,8 @@ setMethod( { contrHlp <- contrast[, j] names(contrHlp) <- rownames(contrast) - intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) - countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1) + intensityComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Intensity")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) + countComponent <- topFeatures(rowData(object[[i]])[, paste0(modelColumn, "Count")], contrast = contrHlp, adjust.method = adjust.method, sort = FALSE, alpha = 1, acceptDifferentReference = acceptDifferentReference) sam <- cbind( intensityComponent[, seq_len(5)], diff --git a/R/msqrob-utils.R b/R/msqrob-utils.R index 9449e32..d29b751 100644 --- a/R/msqrob-utils.R +++ b/R/msqrob-utils.R @@ -86,9 +86,142 @@ makeContrast <- function(contrasts, parameterNames) { return(t(.chrlinfct2matrix(contrasts, parameterNames)$K)) } + +#' Check for presence of reference levels +#' @description Check if the reference levels of the factors in the model are present for a specific feature. +#' +#' @param y A numeric vector with the feature values for each sample. +#' +#' @param data A `DataFrame` with information on the design. It has +#' the same number of rows as the number of columns (samples) of +#' `y`. +#' +#' @param paramNames A `character` vector with the names of the parameters in the model. +#' +#' @param factorLevels A list with the levels of the factors in the model. The names of the list +#' should correspond to the factor variables present in the model. +#' +#' @return A logical vector indicating for each parameter (that comes from a factor variable) in the model +#' if his reference level has values for the current feature. +#' +#' @rdname checkReference +#' +checkReference <- function(y, data, paramNames, factorLevels) { + if (length(factorLevels) == 0) { + return(logical(0)) + } + factors <- factorLevels + subset <- droplevels(data[!is.na(y), names(factors), drop = FALSE]) + paramSplit <- list() + for (x in names(factors)) { + paramSplit[[x]] <- paramNames[grep(x, paramNames)] + } + factor_status <- unlist(lapply(names(factors), function(x) { + factorLevels <- factors[[x]] + referenceRef <- factorLevels[1] + noIntercept <- all(paste0(x, factorLevels) %in% paramNames) + refPres <- referenceRef %in% levels(subset[, x]) + return(noIntercept || refPres) + })) + names(factor_status) <- names(factors) + + referencePresent <- propagateFalseStatus(paramSplit, factor_status) + + return(unlist(referencePresent)) +} + +#' Get the levels of the factors in the model +#' +#' @description Get the levels of the factors in the model associated with the factors +#' names. +#' +#' @param data A `DataFrame` with information on the design. Contains the variables that are used in the model. +#' +#' @param formula A `formula` object that specifies the model. +#' +#' @return A list with the levels of the factors in the model. The names of the list +#' correspond to the factor variables present in the model. +#' +#' @rdname getFormulaFactors +#' +getFormulaFactors <- function(formula, dataFrame) { + vars <- all.vars(formula) + if(length(vars) == 0) { + return(character(0)) + } + nam <- vars[sapply(vars, function(x) is.factor(dataFrame[, x]) || is.character(dataFrame[, x]) )] + res <- lapply(nam, function(x) { + levels(as.factor(dataFrame[, x])) + }) + names(res) <- nam + return(res) +} + +#' Check if the reference levels associated with the contrast are present +#' @description Check if the reference levels associated with the contrast are present +#' +#' @param referencePresent A logical vector indicating for each parameter in the model +#' if his reference level has values for the current feature. +#' +#' @param L A numeric contrast matrix with rownames that equal the model parameters +#' that are involved in the contrasts +#' +#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve +#' parameters with modified reference levels are returned. +#' +#' @return A boolean +#' +#' @rdname referenceContrast +#' +referenceContrast <- function(referencePresent, L, acceptDifferentReference) { + acceptDifferentReference || is.null(referencePresent) || !any(!referencePresent[rownames(L)]) +} + +checkFullRank <- function(modelMatrix) { + return(qr(modelMatrix)$rank == ncol(modelMatrix)) +} + + ##### None exported functions from multcomp package is included here to ##### During R and Bioc checks +propagateFalseStatus <- function(vectors, statuses) { + # Initialize a flag to track if any status has changed + has_changed <- TRUE + + # Use a while loop that runs until no statuses change + while (has_changed) { + # Collect all elements from vectors marked as FALSE + false_elements <- unique(unlist(vectors[statuses == FALSE])) + + # Initialize new statuses as the current statuses + new_statuses <- statuses + + # Check each vector and update status if it contains any false element + for (i in seq_along(vectors)) { + if (statuses[i] == TRUE && any(vectors[[i]] %in% false_elements)) { + new_statuses[i] <- FALSE # Change status to FALSE + } + } + + # Check if any status has changed + has_changed <- !all(new_statuses == statuses) + + # Update statuses with new values + statuses <- new_statuses + } + # Create a named vector of unique elements with their final statuses + all_elements <- unique(unlist(vectors)) + element_status <- setNames(rep(TRUE, length(all_elements)), all_elements) + + # Set the final status of each element based on the propagated statuses + for (i in seq_along(vectors)) { + element_status[vectors[[i]]] <- statuses[i] + } + + return(element_status) +} + .chrlinfct2matrix <- function(ex, var) { if (!is.character(ex)) { @@ -668,3 +801,4 @@ makeContrast <- function(contrasts, parameterNames) { ex, " to character" ) } + diff --git a/R/msqrob.R b/R/msqrob.R index 91ad805..6a844bb 100644 --- a/R/msqrob.R +++ b/R/msqrob.R @@ -57,22 +57,29 @@ msqrobLm <- function(y, robust = TRUE, maxitRob = 5) { myDesign <- model.matrix(formula, data) + missingSamples <- colSums(!is.na(y)) == 0 + subsettedDesign <- myDesign[!missingSamples, , drop = FALSE] + stopifnot("The provided model must be full rank" = checkFullRank(subsettedDesign)) + paramNames <- colnames(myDesign) + factorLevels <- getFormulaFactors(formula, data) models <- apply(y, 1, function(y, design) { ## computatability check obs <- is.finite(y) type <- "fitError" + referencePresent <- checkReference(y, data, paramNames, factorLevels) model <- list( - coefficients = NA, vcovUnscaled = NA, - sigma = NA, df.residual = NA, w = NA + referencePresent = referencePresent, coefficients = NA, + vcovUnscaled = NA, sigma = NA, + df.residual = NA, w = NA ) if (sum(obs) > 0) { ## subset to finite observations, attention with R column switching X <- design[obs, , drop = FALSE] - X <- X[,colMeans(X == 0) != 1 , drop = FALSE] + qrX <- qr(X) + X <- X[, qrX$pivot[seq_len(qrX$rank)], drop = FALSE] y <- y[obs] - colnames_orig <- colnames(design) if (robust) { ## use robust regression from MASS package, "M" estimation is used @@ -108,16 +115,17 @@ msqrobLm <- function(y, } if (type != "fitError") { - coef <- rep(NA, length(colnames_orig)) - names(coef) <- colnames_orig + coef <- rep(NA, length(paramNames)) + names(coef) <- paramNames coef[names(mod$coef)] <- mod$coef - vcovUnscaled <- matrix(NA, nrow =length(colnames_orig), ncol = length(colnames_orig)) - rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- colnames_orig + vcovUnscaled <- matrix(NA, nrow =length(paramNames), ncol = length(paramNames)) + rownames(vcovUnscaled) <- colnames(vcovUnscaled) <- paramNames vcovUnscaled[names(mod$coef), names(mod$coef)] <- msqrob2:::.vcovUnscaled(mod) model <- list( - coefficients = mod$coef, - vcovUnscaled = .vcovUnscaled(mod), + referencePresent = referencePresent, + coefficients = coef, + vcovUnscaled = vcovUnscaled, sigma = sigma, df.residual = df.residual, w = w @@ -770,3 +778,5 @@ msqrobGlm <- function(y, return(models) } + + diff --git a/R/topFeatures.R b/R/topFeatures.R index e1f4d6e..2bb4131 100644 --- a/R/topFeatures.R +++ b/R/topFeatures.R @@ -18,6 +18,9 @@ #' to statistical significance. #' @param alpha `numeric` specifying the cutoff value for adjusted p-values. #' Only features with lower p-values are listed. +#' @param acceptDifferentReference `boolean(1)` to indicate if the contrasts that involve +#' parameters with modified reference levels are returned. Watch out putting this +#' parameter to TRUE can change the interpretation of the logFC. Default is FALSE. #' #' @examples #' data(pe) @@ -43,29 +46,36 @@ #' @author Lieven Clement #' @export -topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1) { - if (is(contrast, "matrix")) { - if (ncol(contrast) > 1) { - stop("Argument contrast is matrix with more than one column, only one contrast is allowed") - } +topFeatures <- function(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1, acceptDifferentReference = FALSE) { + contrast <- as.matrix(contrast) + if (ncol(contrast) > 1) { + stop("Argument contrast is matrix with more than one column, only one contrast is allowed") } - - contrast <- contrast[contrast !=0] + contrast <- contrast[contrast != 0, , drop = FALSE] logFC <- vapply(models, getContrast, numeric(1), - L = contrast + L = contrast, + acceptDifferentReference = acceptDifferentReference ) se <- sqrt(vapply(models, varContrast, numeric(1), - L = contrast + L = contrast, + acceptDifferentReference = acceptDifferentReference )) df <- vapply(models, getDfPosterior, numeric(1)) t <- logFC / se pval <- pt(-abs(t), df) * 2 adjPval <- p.adjust(pval, method = adjust.method) + out <- data.frame(logFC, se, df, t, pval, adjPval) + if (acceptDifferentReference) { + out[["DifferentReference"]] <- vapply(models, function(y){ + any(!getReferencePresent(y)[rownames(contrast)])}, + logical(1) + ) + } if (sort) { if (alpha < 1) { ids <- adjPval < alpha diff --git a/man/checkReference.Rd b/man/checkReference.Rd new file mode 100644 index 0000000..7317e68 --- /dev/null +++ b/man/checkReference.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/msqrob-utils.R +\name{checkReference} +\alias{checkReference} +\title{Check for presence of reference levels} +\usage{ +checkReference(y, data, paramNames, factorLevels) +} +\arguments{ +\item{y}{A numeric vector with the feature values for each sample.} + +\item{data}{A \code{DataFrame} with information on the design. It has +the same number of rows as the number of columns (samples) of +\code{y}.} + +\item{paramNames}{A \code{character} vector with the names of the parameters in the model.} + +\item{factorLevels}{A list with the levels of the factors in the model. The names of the list +should correspond to the factor variables present in the model.} +} +\value{ +A logical vector indicating for each parameter (that comes from a factor variable) in the model +if his reference level has values for the current feature. +} +\description{ +Check if the reference levels of the factors in the model are present for a specific feature. +} diff --git a/man/getFormulaFactors.Rd b/man/getFormulaFactors.Rd new file mode 100644 index 0000000..5ba30e3 --- /dev/null +++ b/man/getFormulaFactors.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/msqrob-utils.R +\name{getFormulaFactors} +\alias{getFormulaFactors} +\title{Get the levels of the factors in the model} +\usage{ +getFormulaFactors(formula, dataFrame) +} +\arguments{ +\item{formula}{A \code{formula} object that specifies the model.} + +\item{data}{A \code{DataFrame} with information on the design. Contains the variables that are used in the model.} +} +\value{ +A list with the levels of the factors in the model. The names of the list +correspond to the factor variables present in the model. +} +\description{ +Get the levels of the factors in the model associated with the factors +names. +} diff --git a/man/hypothesisTest.Rd b/man/hypothesisTest.Rd index e02d4e1..6422bba 100644 --- a/man/hypothesisTest.Rd +++ b/man/hypothesisTest.Rd @@ -16,7 +16,8 @@ expression analysis} adjust.method = "BH", modelColumn = "msqrobModels", resultsColumnNamePrefix = "", - overwrite = FALSE + overwrite = FALSE, + acceptDifferentReference = FALSE ) \S4method{hypothesisTestHurdle}{SummarizedExperiment}( @@ -25,7 +26,8 @@ expression analysis} adjust.method = "BH", modelColumn = "msqrobHurdle", resultsColumnNamePrefix = "hurdle_", - overwrite = FALSE + overwrite = FALSE, + acceptDifferentReference = FALSE ) \S4method{hypothesisTest}{QFeatures}( @@ -35,7 +37,8 @@ expression analysis} adjust.method = "BH", modelColumn = "msqrobModels", resultsColumnNamePrefix = "", - overwrite = FALSE + overwrite = FALSE, + acceptDifferentReference = FALSE ) \S4method{hypothesisTestHurdle}{QFeatures}( @@ -45,7 +48,8 @@ expression analysis} adjust.method = "BH", modelColumn = "msqrobHurdle", resultsColumnNamePrefix = "hurdle_", - overwrite = FALSE + overwrite = FALSE, + acceptDifferentReference = FALSE ) } \arguments{ diff --git a/man/msqrobLmer.Rd b/man/msqrobLmer.Rd index 7d42f81..c025a3c 100644 --- a/man/msqrobLmer.Rd +++ b/man/msqrobLmer.Rd @@ -90,7 +90,7 @@ pe <- aggregateFeatures(pe, i = "peptide", fcol = "Proteins", name = "protein") # Fit MSqrob model using robust ridge regression upon summarization of # peptide intensities into protein expression values -modelsRidge <- msqrobLmer(assay(pe[["protein"]]), ~condition, data = colData(pe), +modelsRidge <- msqrobLmer(assay(pe[["protein"]]), ~condition, data = colData(pe), ridge = TRUE) getCoef(modelsRidge[[1]]) diff --git a/man/referenceContrast.Rd b/man/referenceContrast.Rd new file mode 100644 index 0000000..966d2bb --- /dev/null +++ b/man/referenceContrast.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/msqrob-utils.R +\name{referenceContrast} +\alias{referenceContrast} +\title{Check if the reference levels associated with the contrast are present} +\usage{ +referenceContrast(referencePresent, L, acceptDifferentReference) +} +\arguments{ +\item{referencePresent}{A logical vector indicating for each parameter in the model +if his reference level has values for the current feature.} + +\item{L}{A numeric contrast matrix with rownames that equal the model parameters +that are involved in the contrasts} + +\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve +parameters with modified reference levels are returned.} +} +\value{ +A boolean +} +\description{ +Check if the reference levels associated with the contrast are present +} diff --git a/man/statModelAccessors.Rd b/man/statModelAccessors.Rd index 0a2f04e..8568404 100644 --- a/man/statModelAccessors.Rd +++ b/man/statModelAccessors.Rd @@ -22,6 +22,7 @@ \alias{getVar,StatModel-method} \alias{getSigma,StatModel-method} \alias{getVcovUnscaled,StatModel-method} +\alias{getReferencePresent,StatModel-method} \title{Accessor functions for StatModel class} \usage{ \S4method{getModel}{StatModel}(object) @@ -43,6 +44,8 @@ \S4method{getSigma}{StatModel}(object) \S4method{getVcovUnscaled}{StatModel}(object) + +\S4method{getReferencePresent}{StatModel}(object) } \arguments{ \item{object}{\code{StatModel} object} @@ -65,6 +68,8 @@ the empirical Bayes variance estimator} \item{getSigmaPosterior(object)}{to get the empirical Bayes standard deviation} \item{getVcovUnscaled(object)}{to get the unscaled variance covariance matrix of the model parameters} +\item{getReferencePresent(object)}{to get the vector of logicals indicating if the +reference level associated with a model parameter is present} } } \examples{ diff --git a/man/statModelMethods.Rd b/man/statModelMethods.Rd index 9eeae08..a4f2449 100644 --- a/man/statModelMethods.Rd +++ b/man/statModelMethods.Rd @@ -9,9 +9,9 @@ \alias{varContrast,StatModel-method} \title{Methods for StatModel class} \usage{ -\S4method{getContrast}{StatModel}(object, L) +\S4method{getContrast}{StatModel}(object, L, acceptDifferentReference = FALSE) -\S4method{varContrast}{StatModel}(object, L) +\S4method{varContrast}{StatModel}(object, L, acceptDifferentReference = FALSE) } \arguments{ \item{object}{A list with elements of the class \code{StatModel} that are @@ -21,6 +21,10 @@ estimated using the \code{\link{msqrob}} function} the linear model coefficients to be tested equal to zero. The rownames of the matrix should be equal to the names of parameters of the model.} + +\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve +parameters with modified reference levels are returned. Watch out putting this +parameter to TRUE can change the interpretation of the logFC. Default is FALSE.} } \value{ A matrix with the calculated contrasts or variance-covariance matrix of contrasts diff --git a/man/topTable.Rd b/man/topTable.Rd index 8d3cc76..cc21cc5 100644 --- a/man/topTable.Rd +++ b/man/topTable.Rd @@ -4,7 +4,14 @@ \alias{topFeatures} \title{Toplist of DE proteins, peptides or features} \usage{ -topFeatures(models, contrast, adjust.method = "BH", sort = TRUE, alpha = 1) +topFeatures( + models, + contrast, + adjust.method = "BH", + sort = TRUE, + alpha = 1, + acceptDifferentReference = FALSE +) } \arguments{ \item{models}{A list with elements of the class \code{StatModel} that are @@ -27,6 +34,10 @@ to statistical significance.} \item{alpha}{\code{numeric} specifying the cutoff value for adjusted p-values. Only features with lower p-values are listed.} + +\item{acceptDifferentReference}{\code{boolean(1)} to indicate if the contrasts that involve +parameters with modified reference levels are returned. Watch out putting this +parameter to TRUE can change the interpretation of the logFC. Default is FALSE.} } \value{ A dataframe with log2 fold changes (logFC), standard errors (se), diff --git a/tests/testthat/test_StatModel-methods.R b/tests/testthat/test_StatModel-methods.R new file mode 100644 index 0000000..cb32cfd --- /dev/null +++ b/tests/testthat/test_StatModel-methods.R @@ -0,0 +1,83 @@ +.create_minimal_data <- function(){ + reference_present_no_ref <- c(FALSE, FALSE) + names(reference_present_no_ref) <- c("conditionb", "conditionc") + + reference_present_ref <- c(TRUE, TRUE) + names(reference_present_ref) <- c("conditionb", "conditionc") + + vcovu_no_ref <- matrix(c(0.2, -0.2, NA, -0.2, 0.4, NA, NA, NA, NA), nrow = 3, byrow = TRUE) + colnames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc") + rownames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc") + + vcovu_ref <- matrix(c(0.2, -0.2, -0.2, -0.2, 0.4, -0.2, -0.2, -0.2, 0.6 ), nrow = 3, byrow = TRUE) + colnames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc") + rownames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc") + + stat_model_list <- list(feat1 = StatModel( + type = "lm", + params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = NA), + df.residual = 10, sigma = 0.5, + vcovUnscaled = vcovu_no_ref, + referencePresent = reference_present_no_ref), + varPosterior = 0.1, + dfPosterior = 6 + ), feat2 = StatModel( + type = "lm", + params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = 3), + df.residual = 10, sigma = 0.5, + vcovUnscaled = vcovu_ref, + referencePresent = reference_present_ref), + varPosterior = 0.1, + dfPosterior = 6 + )) + return(stat_model_list) +} + +test_that("getContrast", { + stat_model_list <- .create_minimal_data() + L <- matrix(c(1, 0), nrow = 2, byrow = TRUE) + colnames(L) <- "conditionb=0" + rownames(L) <- c("conditionb", "conditionc") + + res_NA <- matrix(NA) + rownames(res_NA) <- "conditionb=0" + res_NA_dbl <- matrix(as.double(NA)) + rownames(res_NA_dbl) <- "conditionb=0" + expect_equal(res_NA, getContrast(stat_model_list$feat1, L)) + expect_equal(res_NA_dbl, getContrast(stat_model_list$feat1, L, TRUE)) + + res_2 <- matrix(2) + rownames(res_2) <- "conditionb=0" + + L_no_0 <- L[L != 0, , drop = FALSE] # What occurs in topFeatures + expect_identical(res_2, getContrast(stat_model_list$feat1, L_no_0, TRUE)) + + expect_identical(res_2, getContrast(stat_model_list$feat2, L)) + expect_identical(res_2, getContrast(stat_model_list$feat2, L, TRUE)) +}) + +test_that("varContrast", { + stat_model_list <- .create_minimal_data() + L <- matrix(c(1, 0), nrow = 2, byrow = TRUE) + colnames(L) <- "conditionb=0" + rownames(L) <- c("conditionb", "conditionc") + + res_NA <- matrix(NA) + rownames(res_NA) <- "conditionb=0" + colnames(res_NA) <- "conditionb=0" + + res_NA_dbl <- matrix(as.double(NA)) + rownames(res_NA_dbl) <- "conditionb=0" + colnames(res_NA_dbl) <- "conditionb=0" + expect_equal(res_NA, varContrast(stat_model_list$feat1, L)) + expect_equal(res_NA_dbl, varContrast(stat_model_list$feat1, L, TRUE)) + + res_2 <- matrix(0.04) + rownames(res_2) <- "conditionb=0" + colnames(res_2) <- "conditionb=0" + L_no_0 <- L[L != 0, , drop = FALSE] # What occurs in topFeatures + expect_equal(res_2, varContrast(stat_model_list$feat1, L_no_0, TRUE)) + + expect_equal(res_2, varContrast(stat_model_list$feat2, L)) + expect_equal(res_2, varContrast(stat_model_list$feat2, L, TRUE)) +}) diff --git a/tests/testthat/test_msqrob-utils.R b/tests/testthat/test_msqrob-utils.R new file mode 100644 index 0000000..608cd7e --- /dev/null +++ b/tests/testthat/test_msqrob-utils.R @@ -0,0 +1,112 @@ +.create_minimal_data <- function() { + y_no_ref <- c(rep(NA,5), rep(1,10)) + names(y_no_ref) <- 1:15 + y_ref <- c(rep(1,5), rep(1,5), rep(1,5)) + names(y_ref) <- 1:15 + data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))), + condition2 = as.factor(rep(letters[1:5], 3)), + numerical = c(1:5, 1:5, 1:5), + row.names = 1:15) + form <- formula(~ 1 + condition + numerical, data = data) + form_num <- formula(~ 1 + numerical, data = data) + form_no_intercept <- formula(~ -1 + condition + numerical, data = data) + form_interaction <- formula(~ 1 + condition*condition2, data = data) + return(list(data = data, form = form, + form_num = form_num, + form_no_intercept = form_no_intercept, + form_interaction = form_interaction, + y_no_ref = y_no_ref, + y_ref = y_ref)) +} + +test_that("getFormulaFactors", { + data <- .create_minimal_data()$data + form <- .create_minimal_data()$form + form_num <- .create_minimal_data()$form_num + form_no_intercept <- .create_minimal_data()$form_no_intercept + form_interaction <- .create_minimal_data()$form_interaction + + res <- list(condition = c("a", "b", "c")) + res_num <- setNames(list(), character(0)) + res_interaction <- list(condition = c("a", "b", "c"), condition2 = c("a", "b", "c", "d", "e")) + expect_identical(res, getFormulaFactors(form, data)) + expect_identical(res_num, getFormulaFactors(form_num, data)) + expect_identical(res, getFormulaFactors(form_no_intercept, data)) + expect_identical(res_interaction, getFormulaFactors(form_interaction, data)) +}) + +test_that("checkReference", { + data <- .create_minimal_data()$data + y_no_ref <- .create_minimal_data()$y_no_ref + y_ref <- .create_minimal_data()$y_ref + formula <- .create_minimal_data()$form + formula_no_intercept <- .create_minimal_data()$form_no_intercept + formula_interaction <- .create_minimal_data()$form_interaction + + paramNames <- colnames(model.matrix(formula, data = data)) + paramNames_no_intercept <- colnames(model.matrix(formula_no_intercept, data = data)) + paramNames_interaction <- colnames(model.matrix(formula_interaction, data = data)) + + factorVars <- list(condition = c("a", "b", "c")) + + reference_present_no_ref <- c(FALSE, FALSE) + names(reference_present_no_ref) <- c("conditionb", "conditionc") + expect_identical(reference_present_no_ref, checkReference(y_no_ref, data, paramNames, factorVars)) + + reference_present_no_int <- c(TRUE, TRUE, TRUE) + names(reference_present_no_int) <- c("conditiona", "conditionb", "conditionc") + expect_identical(reference_present_no_int, checkReference(y_no_ref, data, paramNames_no_intercept, factorVars)) + + reference_present_ref <- c(TRUE, TRUE) + names(reference_present_ref) <- c("conditionb", "conditionc") + expect_identical(reference_present_ref, checkReference(y_ref, data, paramNames, factorVars)) + + reference_no_var <- logical(0) + expect_identical(reference_no_var, checkReference(y_ref, data, c("(Intercept)"), character(0))) + + reference_interaction <- rep(FALSE, length(paramNames_interaction[-1])) + names(reference_interaction) <- paramNames_interaction[-1] + expect_identical(reference_interaction, checkReference(y_no_ref, data, paramNames_interaction, factorVars)) +}) + +test_that("referenceContrast", { + L <- matrix(c(1, 0), nrow = 2, byrow = TRUE) + colnames(L) <- "conditionb=0" + rownames(L) <- c("conditionb", "conditionc") + + reference_present_no_ref <- c(FALSE, FALSE) + names(reference_present_no_ref) <- c("conditionb", "conditionc") + + expect_identical(FALSE, referenceContrast(reference_present_no_ref, L, FALSE)) + + reference_present_ref <- c(TRUE, TRUE) + names(reference_present_ref) <- c("conditionb", "conditionc") + + expect_identical(TRUE, referenceContrast(reference_present_ref, L, FALSE)) + + expect_identical(TRUE, referenceContrast(reference_present_no_ref, L, TRUE)) + expect_identical(TRUE, referenceContrast(reference_present_ref, L, TRUE)) +}) + +test_that("propagateFalseStatus", { + vector1 <- c("A", "B", "C") + vector2 <- c("B", "D", "E") + vector3 <- c("E", "F", "G") + vector4 <- c("X", "Y", "Z") + + statuses1 <- c(TRUE, FALSE, TRUE, TRUE) + statuses2 <- c(TRUE, TRUE, TRUE, TRUE) + statuses3 <- c(FALSE, TRUE, TRUE, FALSE) + + vectors <- list(vector1, vector2, vector3, vector4) + nam <- c("A", "B", "C", "D", "E", "F", "G", "X", "Y", "Z") + exp_status1 <- c(rep(FALSE, 7), rep(TRUE, 3)) + names(exp_status1) <- nam + exp_status2 <- c(rep(TRUE, 10)) + names(exp_status2) <- nam + exp_status3 <- c(rep(FALSE, 10)) + names(exp_status3) <- nam + expect_identical(exp_status1, propagateFalseStatus(vectors, statuses1)) + expect_identical(exp_status2, propagateFalseStatus(vectors, statuses2)) + expect_identical(exp_status3, propagateFalseStatus(vectors, statuses3)) +}) \ No newline at end of file diff --git a/tests/testthat/test_msqrob.R b/tests/testthat/test_msqrob.R new file mode 100644 index 0000000..b8fc6f6 --- /dev/null +++ b/tests/testthat/test_msqrob.R @@ -0,0 +1,33 @@ +.create_minimal_data <- function() { + y_no_ref <- c(rep(NA,5), runif(10)) + y_ref <- c(runif(5), runif(5), runif(5)) + Y <- matrix(c(y_no_ref, y_ref), nrow = 2, ncol = 15, byrow = TRUE) + Y_defficient <- matrix(c(y_no_ref, y_no_ref), nrow = 2, ncol = 15, byrow = TRUE) + rownames(Y) <- c("feat1", "feat2") + colnames(Y) <- paste0("S", 1:15) + data <- data.frame(condition = as.factor(rep(letters[1:3], c(5,5,5))), + numerical = c(1:5, 1:5, 1:5), + row.names = paste0("S", 1:15)) + form_cond <- formula(~ 1 + condition, data = data) + return(list(data = data, form_cond = form_cond, Y = Y, Y_defficient = Y_defficient)) +} + +test_that("msqrobLm", { + set.seed(123) + Y_defficient <- .create_minimal_data()$Y_defficient + data <- .create_minimal_data()$data + form_cond <- .create_minimal_data()$form_cond + expect_error(msqrobLm(Y_defficient, form_cond, data, robust = FALSE), + "The provided model must be full rank") + + Y <- .create_minimal_data()$Y + msqrobLm_object <- msqrobLm(Y, form_cond, data, robust = FALSE) + + reference_present_no_ref <- c(FALSE, FALSE) + names(reference_present_no_ref) <- c("conditionb", "conditionc") + expect_identical(reference_present_no_ref, msqrobLm_object$feat1@params$referencePresent) + + reference_present_ref <- c(TRUE, TRUE) + names(reference_present_ref) <- c("conditionb", "conditionc") + expect_identical(reference_present_ref, msqrobLm_object$feat2@params$referencePresent) + }) diff --git a/tests/testthat/test_topFeatures.R b/tests/testthat/test_topFeatures.R new file mode 100644 index 0000000..79b9827 --- /dev/null +++ b/tests/testthat/test_topFeatures.R @@ -0,0 +1,60 @@ +.create_minimal_data <- function(){ + reference_present_no_ref <- c(FALSE, FALSE) + names(reference_present_no_ref) <- c("conditionb", "conditionc") + + reference_present_ref <- c(TRUE, TRUE) + names(reference_present_ref) <- c("conditionb", "conditionc") + + vcovu_no_ref <- matrix(c(0.2, -0.2, NA, -0.2, 0.4, NA, NA, NA, NA), nrow = 3, byrow = TRUE) + colnames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc") + rownames(vcovu_no_ref) <- c("(Intercept)", "conditionb", "conditionc") + + vcovu_ref <- matrix(c(0.2, -0.2, -0.2, -0.2, 0.4, -0.2, -0.2, -0.2, 0.6 ), nrow = 3, byrow = TRUE) + colnames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc") + rownames(vcovu_ref) <- c("(Intercept)", "conditionb", "conditionc") + + stat_model_list <- list(feat1 = StatModel( + type = "lm", + params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = NA), + df.residual = 10, sigma = 0.5, + vcovUnscaled = vcovu_no_ref, + referencePresent = reference_present_no_ref), + varPosterior = 0.1, + dfPosterior = 6 + ), feat2 = StatModel( + type = "lm", + params = list(coefficients = c("(Intercept)" = 1, "conditionb" = 2, "conditionc" = 3), + df.residual = 10, sigma = 0.5, + vcovUnscaled = vcovu_ref, + referencePresent = reference_present_ref), + varPosterior = 0.1, + dfPosterior = 6 + )) + return(stat_model_list) +} + + +test_that("topFeatures", { + stat_model_list <- .create_minimal_data() + L <- matrix(c(1, 0), nrow = 2, byrow = TRUE) + colnames(L) <- "conditionb=0" + rownames(L) <- c("conditionb", "conditionc") + + base_df <- data.frame("logFC" = c(2,2), "se" = c(0.2, 0.2), + "df" = c(6,6), "t" = c(10,10), + "pval" = c(5.791983e-05, 5.791983e-05), + "adjPval" = c(5.791983e-05, 5.791983e-05), + row.names = c("feat1", "feat2")) + accept_df <- base_df + accept_df$DifferentReference <- c(TRUE, FALSE) + no_accept_df <- base_df[c(2,1),] + no_accept_df["feat1", ] <- NA + no_accept_df["feat1", "df"] <- 6 + + expect_equal(no_accept_df, + topFeatures(stat_model_list, L), + tolerance = 1e-3) + expect_equal(accept_df, + topFeatures(stat_model_list, L, acceptDifferentReference = TRUE), + tolerance = 1e-3) +}) \ No newline at end of file