diff --git a/R/estimateDivergence.R b/R/estimateDivergence.R index 91b18c765..b37910cb0 100644 --- a/R/estimateDivergence.R +++ b/R/estimateDivergence.R @@ -145,13 +145,13 @@ setMethod("estimateDivergence", signature = c(x="SummarizedExperiment"), if( "median" %in% reference || "mean" %in% reference ){ reference <- apply(mat, 1, reference) } else if( !reference %in% colnames(mat) ) { - stop(paste("Reference", reference, "not recognized.")) + stop("Reference ", reference, " not recognized.", call. = FALSE) } } # Distance between all samples against one reference sample # FIXME: could be be optimzed with sweep / parallelization v <- seq_len(ncol(mat)) - sapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)}) + vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)},FUN.VALUE = numeric(1)) } diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index e9fcc4a50..ac722f4c9 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -216,7 +216,7 @@ #' plotColData(tse, "Shannon") #' # ... by sample type #' plotColData(tse, "Shannon", "SampleType") -#' \dontrun{ +#' \donttest{ #' # combining different plots #' library(patchwork) #' plot_index <- c("Shannon","GiniSimpson") diff --git a/R/estimateDominance.R b/R/estimateDominance.R index 934b129d0..733de5538 100644 --- a/R/estimateDominance.R +++ b/R/estimateDominance.R @@ -184,7 +184,7 @@ #' #' # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +#' \donttest{esophagus <- estimateDominance(esophagus, index="dbp")} #' # Calculates dbp and Core Abundance indices #' esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) #' # Shows all indices diff --git a/R/estimateRichness.R b/R/estimateRichness.R index a7d438cf6..99f9c1d6d 100644 --- a/R/estimateRichness.R +++ b/R/estimateRichness.R @@ -178,7 +178,7 @@ #' #' # Indices must be written correctly (all lowercase), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +#' \donttest{esophagus <- estimateRichness(esophagus, index="ace")} #' #' # Calculates Chao1 and ACE indices only #' esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 5730da27d..2d816003f 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -1002,7 +1002,7 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), # If assays were identical, and duplicate variable pairs were dropped if( assays_identical ){ # Change names so that they are not equal to colnames of variable_pairs - colnames(variable_pairs)[1:2] <- c("Var1_", "Var2_") + colnames(variable_pairs)[c(1,2)] <- c("Var1_", "Var2_") # Combine feature-pair names with correlation values and p-values correlations_and_p_values <- cbind(variable_pairs, correlations_and_p_values) @@ -1170,9 +1170,9 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), do.call(association_FUN, args = c(list(feature_mat), list(...))) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } else { @@ -1180,17 +1180,17 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), suppressWarnings( do.call(association_FUN, args = c(list(feature_mat), list(...))) ) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } # If temp's length is not 1, then function does not return single numeric value for each pair if( length(temp) != 1 ){ - stop(paste0("Error occurred during calculation. Check that ", - "'association_FUN' fulfills requirements."), + stop("Error occurred during calculation. Check that ", + "'association_FUN' fulfills requirements.", call. = FALSE) } return(temp) @@ -1298,8 +1298,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) @@ -1308,8 +1308,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R index 2d974df22..0db66b343 100644 --- a/R/loadFromMetaphlan.R +++ b/R/loadFromMetaphlan.R @@ -307,13 +307,13 @@ loadFromMetaphlan <- function( sample_names <- rownames(coldata) names(sample_names) <- sample_names } else{ - sample_names <- sapply(rownames(coldata), function(x){ + sample_names <- vapply(rownames(coldata), function(x){ x <- colnames(tse)[grep(x, colnames(tse))] if( length(x) != 1 ){ x <- NULL } return(x) - }) + },FUN.VALUE = character(1)) sample_names <- unlist(sample_names) } diff --git a/R/merge.R b/R/merge.R index 31966b5f8..c9713e722 100644 --- a/R/merge.R +++ b/R/merge.R @@ -210,14 +210,14 @@ setGeneric("mergeSamples", .check_assays_for_merge <- function(assay.type, assay){ # Check if assays include binary or negative values if( all(assay == 0 | assay == 1) ){ - warning(paste0("'",assay.type,"'", " includes binary values."), + warning("'",assay.type,"'", " includes binary values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", call. = FALSE) } if( !all( assay >= 0 | is.na(assay) ) ){ - warning(paste0("'",assay.type,"'", " includes negative values."), + warning("'",assay.type,"'", " includes negative values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 1f80a59e5..4b4cc5969 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -369,7 +369,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Loop through individual TreeSEs and add them to tse if( length(x) > 0 ){ - for( i in 1:length(x) ){ + for( i in seq_len(length(x)) ){ # Give message if TRUE if( verbose ){ message("\r", i+1, "/", length(x)+1, appendLF = FALSE) @@ -765,7 +765,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Get the shared class that is highest in hierarchy if( all( classes %in% allowed_classes[1] ) ){ class <- allowed_classes[1] - } else if( all( classes %in% allowed_classes[1:2] ) ){ + } else if( all( classes %in% allowed_classes[c(1,2)] ) ){ class <- allowed_classes[2] } else { class <- allowed_classes[3] diff --git a/R/runCCA.R b/R/runCCA.R index 4040c230f..acfae300f 100644 --- a/R/runCCA.R +++ b/R/runCCA.R @@ -452,7 +452,7 @@ setMethod("runCCA", "SingleCellExperiment", # Get the dissimilarity matrix based on original dissimilarity index # provided by user. If the analysis is CCA, disable method; calculate # always euclidean distances because CCA is based on Euclidean distances. - if( length(class(rda)) == 1 && class(rda) == "cca" ){ + if( length(class(rda)) == 1 && is(rda, 'cca') ){ dist_mat <- vegdist(mat, method = "euclidean") } else{ dist_mat <- vegdist(mat, method = method, ...) diff --git a/R/splitOn.R b/R/splitOn.R index 7464d0bdb..a81d9d9f6 100644 --- a/R/splitOn.R +++ b/R/splitOn.R @@ -291,6 +291,7 @@ setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"), # If the returned value is a list, go through all of them if( is(x, 'SimpleList') ){ x <- SimpleList(lapply(x, .agglomerate_trees)) + } else { # Otherwise, the returned value is TreeSE x <- .agglomerate_trees(x) diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R index bca54cef8..e21909b42 100644 --- a/R/subsampleCounts.R +++ b/R/subsampleCounts.R @@ -10,6 +10,8 @@ #' instances where it can be useful. #' Note that the output of \code{subsampleCounts} is not the equivalent as the #' input and any result have to be verified with the original dataset. +#' To maintain the reproducibility, please define the seed using set.seed() +#' before implement this function. #' #' @param x A #' \code{SummarizedExperiment} object. @@ -28,7 +30,7 @@ #' simulated this can equal to lowest number of total counts #' found in a sample or a user specified number. #' -#' @param seed A random number seed for reproducibility of sampling. +#' #' #' @param replace Logical Default is \code{TRUE}. The default is with #' replacement (\code{replace=TRUE}). @@ -68,10 +70,11 @@ #' # they will be removed. #' data(GlobalPatterns) #' tse <- GlobalPatterns +#' set.seed(123) #' tse.subsampled <- subsampleCounts(tse, #' min_size = 60000, -#' name = "subsampled", -#' seed = 123) +#' name = "subsampled" +#' ) #' tse.subsampled #' dim(tse) #' dim(tse.subsampled) @@ -84,7 +87,7 @@ NULL setGeneric("subsampleCounts", signature = c("x"), function(x, assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, + replace = TRUE, name = "subsampled", verbose = TRUE, ...) standardGeneric("subsampleCounts")) @@ -94,86 +97,78 @@ setGeneric("subsampleCounts", signature = c("x"), #' @aliases rarifyCounts #' @export setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = "counts", - min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, - name = "subsampled", verbose = TRUE, ...){ - - warning("Subsampling/Rarefying may undermine downstream analyses ", - "and have unintended consequences. Therefore, make sure ", - "this normalization is appropriate for your data.", - call. = FALSE) - .check_assay_present(assay.type, x) - if(any(assay(x, assay.type) %% 1 != 0)){ - warning("assay contains non-integer values. Only counts table ", - "is applicable...") - } - if(!is.logical(verbose)){ - stop("`verbose` has to be logical i.e. TRUE or FALSE") - } - if(verbose){ - # Print to screen this value - message("`set.seed(", seed, ")` was used to initialize repeatable ", - "random subsampling.","\nPlease record this for your ", - "records so others can reproduce.") - } - if(!.is_numeric_string(seed)){ - stop("`seed` has to be an numeric value See `?set.seed`") - } - if(!is.logical(replace)){ - stop("`replace` has to be logical i.e. TRUE or FALSE") - } - # Check name - if(!.is_non_empty_string(name) || - name == assay.type){ - stop("'name' must be a non-empty single character value and be ", - "different from `assay.type`.", - call. = FALSE) - } - set.seed(seed) - # Make sure min_size is of length 1. - if(length(min_size) > 1){ - stop("`min_size` had more than one value. ", - "Specifiy a single integer value.") - min_size <- min_size[1] - } - if(!is.numeric(min_size) || - as.integer(min_size) != min_size && min_size <= 0){ - stop("min_size needs to be a positive integer value.") - } - # get samples with less than min number of reads - if(min(colSums2(assay(x, assay.type))) < min_size){ - rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] - # Return NULL, if no samples were found after subsampling - if( !any(!colnames(x) %in% rmsams) ){ - stop("No samples were found after subsampling.", - call. = FALSE) - } - if(verbose){ - message(length(rmsams), " samples removed ", - "because they contained fewer reads than `min_size`.") - } - # remove sample(s) - newtse <- x[, !colnames(x) %in% rmsams] - } else { - newtse <- x - } - newassay <- apply(assay(newtse, assay.type), 2, - .subsample_assay, - min_size=min_size, replace=replace) - rownames(newassay) <- rownames(newtse) - # remove features not present in any samples after subsampling - message(paste(length(which(rowSums2(newassay) == 0)), "features", - "removed because they are not present in all samples", - "after subsampling.")) - newassay <- newassay[rowSums2(newassay)>0,] - newtse <- newtse[rownames(newassay),] - assay(newtse, name, withDimnames=FALSE) <- newassay - newtse <- .add_values_to_metadata(newtse, - "subsampleCounts_min_size", - min_size) - return(newtse) - } + function(x, assay.type = assay_name, assay_name = "counts", + min_size = min(colSums2(assay(x))), + replace = TRUE, + name = "subsampled", verbose = TRUE, ...){ + + warning("Subsampling/Rarefying may undermine downstream analyses ", + "and have unintended consequences. Therefore, make sure ", + "this normalization is appropriate for your data.", + call. = FALSE) + .check_assay_present(assay.type, x) + if(any(assay(x, assay.type) %% 1 != 0)){ + warning("assay contains non-integer values. Only counts table ", + "is applicable...") + } + if(!is.logical(verbose)){ + stop("`verbose` has to be logical i.e. TRUE or FALSE") + } + + if(!is.logical(replace)){ + stop("`replace` has to be logical i.e. TRUE or FALSE") + } + # Check name + if(!.is_non_empty_string(name) || + name == assay.type){ + stop("'name' must be a non-empty single character value and be ", + "different from `assay.type`.", + call. = FALSE) + } + #set.seed(seed) + # Make sure min_size is of length 1. + if(length(min_size) > 1){ + stop("`min_size` had more than one value. ", + "Specifiy a single integer value.") + min_size <- min_size[1] + } + if(!is.numeric(min_size) || + as.integer(min_size) != min_size && min_size <= 0){ + stop("min_size needs to be a positive integer value.") + } + # get samples with less than min number of reads + if(min(colSums2(assay(x, assay.type))) < min_size){ + rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] + # Return NULL, if no samples were found after subsampling + if( !any(!colnames(x) %in% rmsams) ){ + stop("No samples were found after subsampling.", + call. = FALSE) + } + if(verbose){ + message(length(rmsams), " samples removed ", + "because they contained fewer reads than `min_size`.") + } + # remove sample(s) + newtse <- x[, !colnames(x) %in% rmsams] + } else { + newtse <- x + } + newassay <- apply(assay(newtse, assay.type), 2, + .subsample_assay, + min_size=min_size, replace=replace) + rownames(newassay) <- rownames(newtse) + # remove features not present in any samples after subsampling + message(paste(length(which(rowSums2(newassay) == 0)), "features", + "removed because they are not present in all samples", + "after subsampling.")) + newassay <- newassay[rowSums2(newassay)>0,] + newtse <- newtse[rownames(newassay),] + assay(newtse, name, withDimnames=FALSE) <- newassay + newtse <- .add_values_to_metadata(newtse, + "subsampleCounts_min_size", + min_size) + return(newtse) + } ) diff --git a/R/summaries.R b/R/summaries.R index a6ddad2e0..3a742db43 100644 --- a/R/summaries.R +++ b/R/summaries.R @@ -479,7 +479,7 @@ setMethod("summary", signature = c(object = "SummarizedExperiment"), .check_NAs_assay_counts <- function(x, assay.type){ assay.x <- .get_assay(x, assay.type) if(any(is.na(assay.x))) { - stop(paste0("There are samples with NAs in 'assay': ", assay.type), + stop("There are samples with NAs in 'assay': ", assay.type, " . This function is limited to sequencing data only. ", "Where raw counts do not usually have NAs. ", "Try to supply raw counts", diff --git a/R/transformCounts.R b/R/transformCounts.R index bd4cddacd..bb5d40b8a 100644 --- a/R/transformCounts.R +++ b/R/transformCounts.R @@ -565,7 +565,7 @@ setMethod("relAbundanceCounts", signature = c(x = "SummarizedExperiment"), pseudocount <- ifelse(pseudocount, min(mat[mat>0]), 0) # Report pseudocount if positive value if ( pseudocount > 0 ){ - message(paste("A pseudocount of", pseudocount, "was applied.")) + message("A pseudocount of ", pseudocount, " was applied.") } } # Give warning if pseudocount should not be added diff --git a/man/estimateDiversity.Rd b/man/estimateDiversity.Rd index bbe78e48d..ac033b912 100644 --- a/man/estimateDiversity.Rd +++ b/man/estimateDiversity.Rd @@ -241,7 +241,7 @@ library(scater) plotColData(tse, "Shannon") # ... by sample type plotColData(tse, "Shannon", "SampleType") -\dontrun{ +\donttest{ # combining different plots library(patchwork) plot_index <- c("Shannon","GiniSimpson") diff --git a/man/estimateDominance.Rd b/man/estimateDominance.Rd index 48f2b56c8..b53304c8f 100644 --- a/man/estimateDominance.Rd +++ b/man/estimateDominance.Rd @@ -187,7 +187,7 @@ colData(esophagus) # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error # gets thrown -\dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +\donttest{esophagus <- estimateDominance(esophagus, index="dbp")} # Calculates dbp and Core Abundance indices esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) # Shows all indices diff --git a/man/estimateRichness.Rd b/man/estimateRichness.Rd index 09c051792..2073d4c1f 100644 --- a/man/estimateRichness.Rd +++ b/man/estimateRichness.Rd @@ -172,7 +172,7 @@ colData(esophagus) <- NULL # Indices must be written correctly (all lowercase), otherwise an error # gets thrown -\dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +\donttest{esophagus <- estimateRichness(esophagus, index="ace")} # Calculates Chao1 and ACE indices only esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), diff --git a/man/subsampleCounts.Rd b/man/subsampleCounts.Rd index 7e9fabfc8..b93e28ed8 100644 --- a/man/subsampleCounts.Rd +++ b/man/subsampleCounts.Rd @@ -11,7 +11,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -23,7 +22,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -48,8 +46,6 @@ will be disabled.)} simulated this can equal to lowest number of total counts found in a sample or a user specified number.} -\item{seed}{A random number seed for reproducibility of sampling.} - \item{replace}{Logical Default is \code{TRUE}. The default is with replacement (\code{replace=TRUE}). See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} @@ -77,6 +73,8 @@ we include the \code{subsampleCounts} function because there may be some instances where it can be useful. Note that the output of \code{subsampleCounts} is not the equivalent as the input and any result have to be verified with the original dataset. +To maintain the reproducibility, please define the seed using set.seed() +before implement this function. } \examples{ # When samples in TreeSE are less than specified min_size, they will be removed. @@ -84,10 +82,11 @@ input and any result have to be verified with the original dataset. # they will be removed. data(GlobalPatterns) tse <- GlobalPatterns +set.seed(123) tse.subsampled <- subsampleCounts(tse, min_size = 60000, - name = "subsampled", - seed = 123) + name = "subsampled" + ) tse.subsampled dim(tse) dim(tse.subsampled) diff --git a/tests/testthat/test-8subsample.R b/tests/testthat/test-8subsample.R index de0d77304..19a703162 100644 --- a/tests/testthat/test-8subsample.R +++ b/tests/testthat/test-8subsample.R @@ -1,13 +1,13 @@ context("subsampleCounts") test_that("subsampleCounts", { - + seed = 1938 + set.seed(seed) data(GlobalPatterns, package="mia") expect_warning(tse.subsampled <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = TRUE, - seed = 1938)) + replace = TRUE)) # check class expect_s4_class(tse.subsampled, "TreeSummarizedExperiment") expect_equal(nrow(tse.subsampled), 12403) @@ -35,11 +35,12 @@ test_that("subsampleCounts", { expect_equal(unname(colSums2(assay(tse.subsampled, "subsampled"))), expColSums) # When replace = FALSE + seed = 1938 + set.seed(seed) expect_warning(tse.subsampled.rp <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = FALSE, - seed = 1938)) + replace = FALSE)) # check number of features removed is correct expnFeaturesRemovedRp <- 6731