From 70e3e7e4a9a3b95926d11d700025b8df645e9978 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 12:16:43 +0300 Subject: [PATCH 01/18] fix warning BiocCheck --- R/runCCA.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/runCCA.R b/R/runCCA.R index 4040c230f..acfae300f 100644 --- a/R/runCCA.R +++ b/R/runCCA.R @@ -452,7 +452,7 @@ setMethod("runCCA", "SingleCellExperiment", # Get the dissimilarity matrix based on original dissimilarity index # provided by user. If the analysis is CCA, disable method; calculate # always euclidean distances because CCA is based on Euclidean distances. - if( length(class(rda)) == 1 && class(rda) == "cca" ){ + if( length(class(rda)) == 1 && is(rda, 'cca') ){ dist_mat <- vegdist(mat, method = "euclidean") } else{ dist_mat <- vegdist(mat, method = method, ...) From 787cb87b3a991ede01824a6fcceafb6fe271c426 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 12:17:36 +0300 Subject: [PATCH 02/18] fix warning BiocCheck --- R/splitOn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/splitOn.R b/R/splitOn.R index 69bb80810..7428df6b6 100644 --- a/R/splitOn.R +++ b/R/splitOn.R @@ -289,7 +289,7 @@ setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"), # Manipulate rowTree or not? if( update_rowTree ){ # If the returned value is a list, go through all of them - if( class(x) == "SimpleList" ){ + if(is(x, 'SimpleList')){#( class(x) == "SimpleList" ){ x <- SimpleList(lapply(x, addTaxonomyTree)) } else { # Otherwise, the returned value is TreeSE From 81af557d0556919153f06388afa4caafd034a90f Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 12:23:17 +0300 Subject: [PATCH 03/18] fix warning BiocCheck --- R/subsampleCounts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R index bca54cef8..e40e587a7 100644 --- a/R/subsampleCounts.R +++ b/R/subsampleCounts.R @@ -130,7 +130,7 @@ setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), "different from `assay.type`.", call. = FALSE) } - set.seed(seed) + #set.seed(seed) # Make sure min_size is of length 1. if(length(min_size) > 1){ stop("`min_size` had more than one value. ", From 0ae160251e3eaf409d3444a2199460b98fdf29c3 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:41:31 +0300 Subject: [PATCH 04/18] fix note and warning BiocCheck --- R/estimateDivergence.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/estimateDivergence.R b/R/estimateDivergence.R index 91b18c765..29de3c472 100644 --- a/R/estimateDivergence.R +++ b/R/estimateDivergence.R @@ -145,13 +145,13 @@ setMethod("estimateDivergence", signature = c(x="SummarizedExperiment"), if( "median" %in% reference || "mean" %in% reference ){ reference <- apply(mat, 1, reference) } else if( !reference %in% colnames(mat) ) { - stop(paste("Reference", reference, "not recognized.")) + stop("Reference ", reference, " not recognized.", call. = FALSE) } } # Distance between all samples against one reference sample # FIXME: could be be optimzed with sweep / parallelization v <- seq_len(ncol(mat)) - sapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)}) + vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)}) } From 27c15c019346def5741684879bc9777fedec1166 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:42:13 +0300 Subject: [PATCH 05/18] fix note and warning BiocCheck --- R/getExperimentCrossAssociation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 04213c73a..9911f6359 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -956,7 +956,7 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), # If assays were identical, and duplicate variable pairs were dropped if( assays_identical ){ # Change names so that they are not equal to colnames of variable_pairs - colnames(variable_pairs)[1:2] <- c("Var1_", "Var2_") + colnames(variable_pairs)c(1:2) <- c("Var1_", "Var2_") # Combine feature-pair names with correlation values and p-values correlations_and_p_values <- cbind(variable_pairs, correlations_and_p_values) From 4933280563995020bffb0c2ab618ec0acd4a044b Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:42:48 +0300 Subject: [PATCH 06/18] fix note and warning BiocCheck --- R/mergeSEs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 19d059443..23fdd19be 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -369,7 +369,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Loop through individual TreeSEs and add them to tse if( length(x) > 0 ){ - for( i in 1:length(x) ){ + for( i in seq_len(length(x)) ){ # Give message if TRUE if( verbose ){ message("\r", i+1, "/", length(x)+1, appendLF = FALSE) From 213682aa690203637559812229f1e745cf99fa7b Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:44:33 +0300 Subject: [PATCH 07/18] fix note and warning BiocCheck --- R/getExperimentCrossAssociation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 9911f6359..9cf089ea1 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -1124,9 +1124,9 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), do.call(association_FUN, args = c(list(feature_mat), list(...))) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } else { From 64c97dfbae767aad8aa781a82ff2c0f21b877d1a Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:46:26 +0300 Subject: [PATCH 08/18] fix note and warning BiocCheck --- R/getExperimentCrossAssociation.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 9cf089ea1..64960b66e 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -1134,17 +1134,17 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), suppressWarnings( do.call(association_FUN, args = c(list(feature_mat), list(...))) ) }, error = function(cond) { - stop(paste0("Error occurred during calculation. Check, e.g., that ", + stop("Error occurred during calculation. Check, e.g., that ", "'association_FUN' fulfills requirements. 'association_FUN' ", - "threw a following error:\n", cond), + "threw a following error:\n", cond, call. = FALSE) }) } # If temp's length is not 1, then function does not return single numeric value for each pair if( length(temp) != 1 ){ - stop(paste0("Error occurred during calculation. Check that ", - "'association_FUN' fulfills requirements."), + stop("Error occurred during calculation. Check that ", + "'association_FUN' fulfills requirements.", call. = FALSE) } return(temp) @@ -1252,8 +1252,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) @@ -1262,8 +1262,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), use="pairwise.complete.obs")))$order }, error = function(cond) { - stop(paste0("Error occurred during sorting. Possible reason is that ", - "correlation matrix includes NAs. Try with 'sort = FALSE'."), + stop("Error occurred during sorting. Possible reason is that ", + "correlation matrix includes NAs. Try with 'sort = FALSE'.", call. = FALSE) } ) From e4413698bf4046f8ea4bfa9d7d7d4414eb0ca4c7 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 13:50:11 +0300 Subject: [PATCH 09/18] fix note and warning BiocCheck --- R/summaries.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/summaries.R b/R/summaries.R index 1d6afb487..6ccba4a0b 100644 --- a/R/summaries.R +++ b/R/summaries.R @@ -485,7 +485,7 @@ setMethod("summary", signature = c(object = "SummarizedExperiment"), .check_NAs_assay_counts <- function(x, assay.type){ assay.x <- .get_assay(x, assay.type) if(any(is.na(assay.x))) { - stop(paste0("There are samples with NAs in 'assay': ", assay.type), + stop("There are samples with NAs in 'assay': ", assay.type, " . This function is limited to sequencing data only. ", "Where raw counts do not usually have NAs. ", "Try to supply raw counts", From 1571cc31f400a21cb0af34493dce01b48bc2824c Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 14:36:40 +0300 Subject: [PATCH 10/18] fix note and warning BiocCheck --- R/estimateDiversity.R | 2 +- R/estimateDominance.R | 2 +- R/transformCounts.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index e9fcc4a50..ac722f4c9 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -216,7 +216,7 @@ #' plotColData(tse, "Shannon") #' # ... by sample type #' plotColData(tse, "Shannon", "SampleType") -#' \dontrun{ +#' \donttest{ #' # combining different plots #' library(patchwork) #' plot_index <- c("Shannon","GiniSimpson") diff --git a/R/estimateDominance.R b/R/estimateDominance.R index 934b129d0..733de5538 100644 --- a/R/estimateDominance.R +++ b/R/estimateDominance.R @@ -184,7 +184,7 @@ #' #' # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +#' \donttest{esophagus <- estimateDominance(esophagus, index="dbp")} #' # Calculates dbp and Core Abundance indices #' esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) #' # Shows all indices diff --git a/R/transformCounts.R b/R/transformCounts.R index 2b96130aa..c90713efd 100644 --- a/R/transformCounts.R +++ b/R/transformCounts.R @@ -564,7 +564,7 @@ setMethod("relAbundanceCounts", signature = c(x = "SummarizedExperiment"), pseudocount <- ifelse(pseudocount, min(mat[mat>0]), 0) # Report pseudocount if positive value if ( pseudocount > 0 ){ - message(paste("A pseudocount of", pseudocount, "was applied.")) + message("A pseudocount of ", pseudocount, " was applied.") } } # Give warning if pseudocount should not be added From 7b367a7d556a04c31f0aed7e8f4d246cf8ff22fa Mon Sep 17 00:00:00 2001 From: erawijantari Date: Wed, 18 Oct 2023 14:54:45 +0300 Subject: [PATCH 11/18] fix note and warning BiocCheck --- R/mergeSEs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 23fdd19be..d63239584 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -744,7 +744,7 @@ setMethod("right_join", signature = c(x = "ANY"), # Get the shared class that is highest in hierarchy if( all( classes %in% allowed_classes[1] ) ){ class <- allowed_classes[1] - } else if( all( classes %in% allowed_classes[1:2] ) ){ + } else if( all( classes %in% allowed_classes[c(1,2)] ) ){ class <- allowed_classes[2] } else { class <- allowed_classes[3] From 1af277b349601b384b8fb6cdb8c291b09b50d125 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Thu, 19 Oct 2023 09:56:24 +0300 Subject: [PATCH 12/18] fix note and warning BiocCheck --- R/subsampleCounts.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R index e40e587a7..66ad07051 100644 --- a/R/subsampleCounts.R +++ b/R/subsampleCounts.R @@ -130,7 +130,7 @@ setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), "different from `assay.type`.", call. = FALSE) } - #set.seed(seed) + set.seed(seed) # Make sure min_size is of length 1. if(length(min_size) > 1){ stop("`min_size` had more than one value. ", @@ -163,9 +163,9 @@ setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), min_size=min_size, replace=replace) rownames(newassay) <- rownames(newtse) # remove features not present in any samples after subsampling - message(paste(length(which(rowSums2(newassay) == 0)), "features", - "removed because they are not present in all samples", - "after subsampling.")) + message(length(which(rowSums2(newassay) == 0)), " features", + " removed because they are not present in all samples", + " after subsampling.") newassay <- newassay[rowSums2(newassay)>0,] newtse <- newtse[rownames(newassay),] assay(newtse, name, withDimnames=FALSE) <- newassay From 911cb53b90537b44233d6271b07158faa4dade7d Mon Sep 17 00:00:00 2001 From: erawijantari Date: Thu, 19 Oct 2023 10:05:52 +0300 Subject: [PATCH 13/18] fix note and warning BiocCheck --- R/getExperimentCrossAssociation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R index 64960b66e..5c566cffb 100644 --- a/R/getExperimentCrossAssociation.R +++ b/R/getExperimentCrossAssociation.R @@ -956,7 +956,7 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"), # If assays were identical, and duplicate variable pairs were dropped if( assays_identical ){ # Change names so that they are not equal to colnames of variable_pairs - colnames(variable_pairs)c(1:2) <- c("Var1_", "Var2_") + colnames(variable_pairs)[c(1,2)] <- c("Var1_", "Var2_") # Combine feature-pair names with correlation values and p-values correlations_and_p_values <- cbind(variable_pairs, correlations_and_p_values) From c628baa03bb1447e30f86747c88255ce0f5e08bc Mon Sep 17 00:00:00 2001 From: erawijantari Date: Thu, 19 Oct 2023 10:33:16 +0300 Subject: [PATCH 14/18] fix note and warning BiocCheck --- R/estimateDivergence.R | 2 +- R/estimateRichness.R | 2 +- R/loadFromMetaphlan.R | 4 ++-- R/merge.R | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/estimateDivergence.R b/R/estimateDivergence.R index 29de3c472..b37910cb0 100644 --- a/R/estimateDivergence.R +++ b/R/estimateDivergence.R @@ -152,6 +152,6 @@ setMethod("estimateDivergence", signature = c(x="SummarizedExperiment"), # Distance between all samples against one reference sample # FIXME: could be be optimzed with sweep / parallelization v <- seq_len(ncol(mat)) - vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)}) + vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)},FUN.VALUE = numeric(1)) } diff --git a/R/estimateRichness.R b/R/estimateRichness.R index a7d438cf6..99f9c1d6d 100644 --- a/R/estimateRichness.R +++ b/R/estimateRichness.R @@ -178,7 +178,7 @@ #' #' # Indices must be written correctly (all lowercase), otherwise an error #' # gets thrown -#' \dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +#' \donttest{esophagus <- estimateRichness(esophagus, index="ace")} #' #' # Calculates Chao1 and ACE indices only #' esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R index ccca5bdf6..bbfbfbea3 100644 --- a/R/loadFromMetaphlan.R +++ b/R/loadFromMetaphlan.R @@ -291,13 +291,13 @@ loadFromMetaphlan <- function( sample_names <- rownames(coldata) names(sample_names) <- sample_names } else{ - sample_names <- sapply(rownames(coldata), function(x){ + sample_names <- vapply(rownames(coldata), function(x){ x <- colnames(tse)[grep(x, colnames(tse))] if( length(x) != 1 ){ x <- NULL } return(x) - }) + },FUN.VALUE = character(1)) sample_names <- unlist(sample_names) } diff --git a/R/merge.R b/R/merge.R index f30af2567..e374cf25e 100644 --- a/R/merge.R +++ b/R/merge.R @@ -202,14 +202,14 @@ setGeneric("mergeSamples", .check_assays_for_merge <- function(assay.type, assay){ # Check if assays include binary or negative values if( all(assay == 0 | assay == 1) ){ - warning(paste0("'",assay.type,"'", " includes binary values."), + warning("'",assay.type,"'", " includes binary values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", call. = FALSE) } if( !all( assay >= 0 | is.na(assay) ) ){ - warning(paste0("'",assay.type,"'", " includes negative values."), + warning("'",assay.type,"'", " includes negative values.", "\nAgglomeration of it might lead to meaningless values.", "\nCheck the assay, and consider doing transformation again manually", " with agglomerated data.", From 181691e59aace06cdc60f05c44ef8abf4e9b8eb1 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Thu, 19 Oct 2023 10:40:53 +0300 Subject: [PATCH 15/18] fix note and warning BiocCheck --- man/estimateDiversity.Rd | 2 +- man/estimateDominance.Rd | 2 +- man/estimateRichness.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/estimateDiversity.Rd b/man/estimateDiversity.Rd index bbe78e48d..ac033b912 100644 --- a/man/estimateDiversity.Rd +++ b/man/estimateDiversity.Rd @@ -241,7 +241,7 @@ library(scater) plotColData(tse, "Shannon") # ... by sample type plotColData(tse, "Shannon", "SampleType") -\dontrun{ +\donttest{ # combining different plots library(patchwork) plot_index <- c("Shannon","GiniSimpson") diff --git a/man/estimateDominance.Rd b/man/estimateDominance.Rd index 48f2b56c8..b53304c8f 100644 --- a/man/estimateDominance.Rd +++ b/man/estimateDominance.Rd @@ -187,7 +187,7 @@ colData(esophagus) # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error # gets thrown -\dontrun{esophagus <- estimateDominance(esophagus, index="dbp")} +\donttest{esophagus <- estimateDominance(esophagus, index="dbp")} # Calculates dbp and Core Abundance indices esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance")) # Shows all indices diff --git a/man/estimateRichness.Rd b/man/estimateRichness.Rd index 09c051792..2073d4c1f 100644 --- a/man/estimateRichness.Rd +++ b/man/estimateRichness.Rd @@ -172,7 +172,7 @@ colData(esophagus) <- NULL # Indices must be written correctly (all lowercase), otherwise an error # gets thrown -\dontrun{esophagus <- estimateRichness(esophagus, index="ace")} +\donttest{esophagus <- estimateRichness(esophagus, index="ace")} # Calculates Chao1 and ACE indices only esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"), From 6bdd6fcf4bb8867ef964759bee35d7a602bd3ced Mon Sep 17 00:00:00 2001 From: erawijantari Date: Tue, 28 Nov 2023 13:33:51 +0200 Subject: [PATCH 16/18] remove set.seed() inside function --- R/subsampleCounts.R | 163 +++++++++++++++++++++----------------------- 1 file changed, 79 insertions(+), 84 deletions(-) diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R index 66ad07051..e21909b42 100644 --- a/R/subsampleCounts.R +++ b/R/subsampleCounts.R @@ -10,6 +10,8 @@ #' instances where it can be useful. #' Note that the output of \code{subsampleCounts} is not the equivalent as the #' input and any result have to be verified with the original dataset. +#' To maintain the reproducibility, please define the seed using set.seed() +#' before implement this function. #' #' @param x A #' \code{SummarizedExperiment} object. @@ -28,7 +30,7 @@ #' simulated this can equal to lowest number of total counts #' found in a sample or a user specified number. #' -#' @param seed A random number seed for reproducibility of sampling. +#' #' #' @param replace Logical Default is \code{TRUE}. The default is with #' replacement (\code{replace=TRUE}). @@ -68,10 +70,11 @@ #' # they will be removed. #' data(GlobalPatterns) #' tse <- GlobalPatterns +#' set.seed(123) #' tse.subsampled <- subsampleCounts(tse, #' min_size = 60000, -#' name = "subsampled", -#' seed = 123) +#' name = "subsampled" +#' ) #' tse.subsampled #' dim(tse) #' dim(tse.subsampled) @@ -84,7 +87,7 @@ NULL setGeneric("subsampleCounts", signature = c("x"), function(x, assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, + replace = TRUE, name = "subsampled", verbose = TRUE, ...) standardGeneric("subsampleCounts")) @@ -94,86 +97,78 @@ setGeneric("subsampleCounts", signature = c("x"), #' @aliases rarifyCounts #' @export setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"), - function(x, assay.type = assay_name, assay_name = "counts", - min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, - name = "subsampled", verbose = TRUE, ...){ - - warning("Subsampling/Rarefying may undermine downstream analyses ", - "and have unintended consequences. Therefore, make sure ", - "this normalization is appropriate for your data.", - call. = FALSE) - .check_assay_present(assay.type, x) - if(any(assay(x, assay.type) %% 1 != 0)){ - warning("assay contains non-integer values. Only counts table ", - "is applicable...") - } - if(!is.logical(verbose)){ - stop("`verbose` has to be logical i.e. TRUE or FALSE") - } - if(verbose){ - # Print to screen this value - message("`set.seed(", seed, ")` was used to initialize repeatable ", - "random subsampling.","\nPlease record this for your ", - "records so others can reproduce.") - } - if(!.is_numeric_string(seed)){ - stop("`seed` has to be an numeric value See `?set.seed`") - } - if(!is.logical(replace)){ - stop("`replace` has to be logical i.e. TRUE or FALSE") - } - # Check name - if(!.is_non_empty_string(name) || - name == assay.type){ - stop("'name' must be a non-empty single character value and be ", - "different from `assay.type`.", - call. = FALSE) - } - set.seed(seed) - # Make sure min_size is of length 1. - if(length(min_size) > 1){ - stop("`min_size` had more than one value. ", - "Specifiy a single integer value.") - min_size <- min_size[1] - } - if(!is.numeric(min_size) || - as.integer(min_size) != min_size && min_size <= 0){ - stop("min_size needs to be a positive integer value.") - } - # get samples with less than min number of reads - if(min(colSums2(assay(x, assay.type))) < min_size){ - rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] - # Return NULL, if no samples were found after subsampling - if( !any(!colnames(x) %in% rmsams) ){ - stop("No samples were found after subsampling.", - call. = FALSE) - } - if(verbose){ - message(length(rmsams), " samples removed ", - "because they contained fewer reads than `min_size`.") - } - # remove sample(s) - newtse <- x[, !colnames(x) %in% rmsams] - } else { - newtse <- x - } - newassay <- apply(assay(newtse, assay.type), 2, - .subsample_assay, - min_size=min_size, replace=replace) - rownames(newassay) <- rownames(newtse) - # remove features not present in any samples after subsampling - message(length(which(rowSums2(newassay) == 0)), " features", - " removed because they are not present in all samples", - " after subsampling.") - newassay <- newassay[rowSums2(newassay)>0,] - newtse <- newtse[rownames(newassay),] - assay(newtse, name, withDimnames=FALSE) <- newassay - newtse <- .add_values_to_metadata(newtse, - "subsampleCounts_min_size", - min_size) - return(newtse) - } + function(x, assay.type = assay_name, assay_name = "counts", + min_size = min(colSums2(assay(x))), + replace = TRUE, + name = "subsampled", verbose = TRUE, ...){ + + warning("Subsampling/Rarefying may undermine downstream analyses ", + "and have unintended consequences. Therefore, make sure ", + "this normalization is appropriate for your data.", + call. = FALSE) + .check_assay_present(assay.type, x) + if(any(assay(x, assay.type) %% 1 != 0)){ + warning("assay contains non-integer values. Only counts table ", + "is applicable...") + } + if(!is.logical(verbose)){ + stop("`verbose` has to be logical i.e. TRUE or FALSE") + } + + if(!is.logical(replace)){ + stop("`replace` has to be logical i.e. TRUE or FALSE") + } + # Check name + if(!.is_non_empty_string(name) || + name == assay.type){ + stop("'name' must be a non-empty single character value and be ", + "different from `assay.type`.", + call. = FALSE) + } + #set.seed(seed) + # Make sure min_size is of length 1. + if(length(min_size) > 1){ + stop("`min_size` had more than one value. ", + "Specifiy a single integer value.") + min_size <- min_size[1] + } + if(!is.numeric(min_size) || + as.integer(min_size) != min_size && min_size <= 0){ + stop("min_size needs to be a positive integer value.") + } + # get samples with less than min number of reads + if(min(colSums2(assay(x, assay.type))) < min_size){ + rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] + # Return NULL, if no samples were found after subsampling + if( !any(!colnames(x) %in% rmsams) ){ + stop("No samples were found after subsampling.", + call. = FALSE) + } + if(verbose){ + message(length(rmsams), " samples removed ", + "because they contained fewer reads than `min_size`.") + } + # remove sample(s) + newtse <- x[, !colnames(x) %in% rmsams] + } else { + newtse <- x + } + newassay <- apply(assay(newtse, assay.type), 2, + .subsample_assay, + min_size=min_size, replace=replace) + rownames(newassay) <- rownames(newtse) + # remove features not present in any samples after subsampling + message(paste(length(which(rowSums2(newassay) == 0)), "features", + "removed because they are not present in all samples", + "after subsampling.")) + newassay <- newassay[rowSums2(newassay)>0,] + newtse <- newtse[rownames(newassay),] + assay(newtse, name, withDimnames=FALSE) <- newassay + newtse <- .add_values_to_metadata(newtse, + "subsampleCounts_min_size", + min_size) + return(newtse) + } ) From 0db2e520a9d74c41fd3b0b404e147ca7b38cafa9 Mon Sep 17 00:00:00 2001 From: erawijantari Date: Tue, 28 Nov 2023 15:02:00 +0200 Subject: [PATCH 17/18] adjust the test code by setting the seed outside function --- man/subsampleCounts.Rd | 11 +++++------ tests/testthat/test-8subsample.R | 11 ++++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/man/subsampleCounts.Rd b/man/subsampleCounts.Rd index 7e9fabfc8..b93e28ed8 100644 --- a/man/subsampleCounts.Rd +++ b/man/subsampleCounts.Rd @@ -11,7 +11,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -23,7 +22,6 @@ subsampleCounts( assay.type = assay_name, assay_name = "counts", min_size = min(colSums2(assay(x))), - seed = runif(1, 0, .Machine$integer.max), replace = TRUE, name = "subsampled", verbose = TRUE, @@ -48,8 +46,6 @@ will be disabled.)} simulated this can equal to lowest number of total counts found in a sample or a user specified number.} -\item{seed}{A random number seed for reproducibility of sampling.} - \item{replace}{Logical Default is \code{TRUE}. The default is with replacement (\code{replace=TRUE}). See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} @@ -77,6 +73,8 @@ we include the \code{subsampleCounts} function because there may be some instances where it can be useful. Note that the output of \code{subsampleCounts} is not the equivalent as the input and any result have to be verified with the original dataset. +To maintain the reproducibility, please define the seed using set.seed() +before implement this function. } \examples{ # When samples in TreeSE are less than specified min_size, they will be removed. @@ -84,10 +82,11 @@ input and any result have to be verified with the original dataset. # they will be removed. data(GlobalPatterns) tse <- GlobalPatterns +set.seed(123) tse.subsampled <- subsampleCounts(tse, min_size = 60000, - name = "subsampled", - seed = 123) + name = "subsampled" + ) tse.subsampled dim(tse) dim(tse.subsampled) diff --git a/tests/testthat/test-8subsample.R b/tests/testthat/test-8subsample.R index de0d77304..19a703162 100644 --- a/tests/testthat/test-8subsample.R +++ b/tests/testthat/test-8subsample.R @@ -1,13 +1,13 @@ context("subsampleCounts") test_that("subsampleCounts", { - + seed = 1938 + set.seed(seed) data(GlobalPatterns, package="mia") expect_warning(tse.subsampled <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = TRUE, - seed = 1938)) + replace = TRUE)) # check class expect_s4_class(tse.subsampled, "TreeSummarizedExperiment") expect_equal(nrow(tse.subsampled), 12403) @@ -35,11 +35,12 @@ test_that("subsampleCounts", { expect_equal(unname(colSums2(assay(tse.subsampled, "subsampled"))), expColSums) # When replace = FALSE + seed = 1938 + set.seed(seed) expect_warning(tse.subsampled.rp <- subsampleCounts(GlobalPatterns, min_size = 60000, name = "subsampled", - replace = FALSE, - seed = 1938)) + replace = FALSE)) # check number of features removed is correct expnFeaturesRemovedRp <- 6731 From d04d36eef86094247ac162816ed53627b22f6c1f Mon Sep 17 00:00:00 2001 From: erawijantari Date: Mon, 11 Mar 2024 15:26:35 +0200 Subject: [PATCH 18/18] resolve conflict --- R/splitOn.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/splitOn.R b/R/splitOn.R index 7472e085a..29cbf3258 100644 --- a/R/splitOn.R +++ b/R/splitOn.R @@ -289,7 +289,7 @@ setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"), # Manipulate rowTree or not? if( update_rowTree ){ # If the returned value is a list, go through all of them - if(is(x, 'SimpleList')){#( class(x) == "SimpleList" ){ + if(is(x, 'SimpleList')){ x <- SimpleList(lapply(x, addTaxonomyTree)) } else { # Otherwise, the returned value is TreeSE