microbiome · antagomir · Apr 3, 2024 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/R/estimateDivergence.R b/R/estimateDivergence.R
@@ -145,13 +145,13 @@ setMethod("estimateDivergence", signature = c(x="SummarizedExperiment"),
         if( "median" %in% reference || "mean" %in% reference ){
             reference <- apply(mat, 1, reference)
         } else if( !reference %in% colnames(mat) ) {
-            stop(paste("Reference", reference, "not recognized."))
+            stop("Reference ", reference, " not recognized.", call. = FALSE)
         }
     }
 
     # Distance between all samples against one reference sample
     # FIXME: could be be optimzed with sweep / parallelization
     v <- seq_len(ncol(mat))
-    sapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)})
+    vapply(v, function (i) {FUN(rbind(mat[,i], reference), method=method, ...)},FUN.VALUE = numeric(1))
 }
 
diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R
@@ -216,7 +216,7 @@
 #' plotColData(tse, "Shannon")
 #' # ... by sample type
 #' plotColData(tse, "Shannon", "SampleType")
-#' \dontrun{
+#' \donttest{
 #' # combining different plots
 #' library(patchwork)
 #' plot_index <- c("Shannon","GiniSimpson")

diff --git a/R/estimateDominance.R b/R/estimateDominance.R
@@ -184,7 +184,7 @@
 #'
 #' # Indices must be written correctly (e.g. dbp, not dbp), otherwise an error
 #' # gets thrown
-#' \dontrun{esophagus <- estimateDominance(esophagus, index="dbp")}
+#' \donttest{esophagus <- estimateDominance(esophagus, index="dbp")}
 #' # Calculates dbp and Core Abundance indices
 #' esophagus <- estimateDominance(esophagus, index=c("dbp", "core_abundance"))
 #' # Shows all indices

diff --git a/R/estimateRichness.R b/R/estimateRichness.R
@@ -178,7 +178,7 @@
 #'
 #' # Indices must be written correctly (all lowercase), otherwise an error
 #' # gets thrown
-#' \dontrun{esophagus <- estimateRichness(esophagus, index="ace")}
+#' \donttest{esophagus <- estimateRichness(esophagus, index="ace")}
 #'
 #' # Calculates Chao1 and ACE indices only
 #' esophagus <- estimateRichness(esophagus, index=c("chao1", "ace"),

diff --git a/R/getExperimentCrossAssociation.R b/R/getExperimentCrossAssociation.R
@@ -1002,7 +1002,7 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"),
     # If assays were identical, and duplicate variable pairs were dropped
     if( assays_identical ){
         # Change names so that they are not equal to colnames of variable_pairs
-        colnames(variable_pairs)[1:2] <- c("Var1_", "Var2_")
+        colnames(variable_pairs)[c(1,2)] <- c("Var1_", "Var2_")
         # Combine feature-pair names with correlation values and p-values
         correlations_and_p_values <- cbind(variable_pairs, correlations_and_p_values)
 
@@ -1170,27 +1170,27 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"),
           do.call(association_FUN, args = c(list(feature_mat), list(...)))
         },
         error = function(cond) {
-            stop(paste0("Error occurred during calculation. Check, e.g., that ",
+            stop("Error occurred during calculation. Check, e.g., that ",
                 "'association_FUN' fulfills requirements. 'association_FUN' ",
-                "threw a following error:\n",  cond),
+                "threw a following error:\n",  cond,
                 call. = FALSE)
         })
     } else {
         temp <- tryCatch({
             suppressWarnings( do.call(association_FUN, args = c(list(feature_mat), list(...))) )
         },
         error = function(cond) {
-            stop(paste0("Error occurred during calculation. Check, e.g., that ",
+            stop("Error occurred during calculation. Check, e.g., that ",
                     "'association_FUN' fulfills requirements. 'association_FUN' ",
-                    "threw a following error:\n",  cond),
+                    "threw a following error:\n",  cond,
                  call. = FALSE)
         })
     }
 
     # If temp's length is not 1, then function does not return single numeric value for each pair
     if( length(temp) != 1 ){
-        stop(paste0("Error occurred during calculation. Check that ", 
-            "'association_FUN' fulfills requirements."), 
+        stop("Error occurred during calculation. Check that ", 
+            "'association_FUN' fulfills requirements.", 
             call. = FALSE)
     } 
     return(temp)
@@ -1298,8 +1298,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"),
                                use="pairwise.complete.obs")))$order
     },
     error = function(cond) {
-        stop(paste0("Error occurred during sorting. Possible reason is that ",
-                    "correlation matrix includes NAs. Try with 'sort = FALSE'."), 
+        stop("Error occurred during sorting. Possible reason is that ",
+                    "correlation matrix includes NAs. Try with 'sort = FALSE'.", 
              call. = FALSE)
     }
     )
@@ -1308,8 +1308,8 @@ setMethod("getExperimentCrossCorrelation", signature = c(x = "ANY"),
                                use="pairwise.complete.obs")))$order
     },
     error = function(cond) {
-        stop(paste0("Error occurred during sorting. Possible reason is that ",
-                    "correlation matrix includes NAs. Try with 'sort = FALSE'."), 
+        stop("Error occurred during sorting. Possible reason is that ",
+                    "correlation matrix includes NAs. Try with 'sort = FALSE'.", 
              call. = FALSE)
     }
     )

diff --git a/R/loadFromMetaphlan.R b/R/loadFromMetaphlan.R
@@ -307,13 +307,13 @@ loadFromMetaphlan <- function(
         sample_names <- rownames(coldata)
         names(sample_names) <- sample_names
     } else{
-        sample_names <- sapply(rownames(coldata), function(x){
+        sample_names <- vapply(rownames(coldata), function(x){
             x <- colnames(tse)[grep(x, colnames(tse))]
             if( length(x) != 1 ){
                 x <- NULL
             }
             return(x)
-        })
+        },FUN.VALUE = character(1))
         sample_names <- unlist(sample_names)
     }
 

diff --git a/R/merge.R b/R/merge.R
@@ -210,14 +210,14 @@ setGeneric("mergeSamples",
 .check_assays_for_merge <- function(assay.type, assay){
     # Check if assays include binary or negative values
     if( all(assay == 0 | assay == 1) ){
-        warning(paste0("'",assay.type,"'", " includes binary values."),
+        warning("'",assay.type,"'", " includes binary values.",
                 "\nAgglomeration of it might lead to meaningless values.", 
                 "\nCheck the assay, and consider doing transformation again manually", 
                 " with agglomerated data.",
                 call. = FALSE)
     }
     if( !all( assay >= 0 | is.na(assay) ) ){
-        warning(paste0("'",assay.type,"'", " includes negative values."),
+        warning("'",assay.type,"'", " includes negative values.",
                 "\nAgglomeration of it might lead to meaningless values.",
                 "\nCheck the assay, and consider doing transformation again manually", 
                 " with agglomerated data.",

diff --git a/R/mergeSEs.R b/R/mergeSEs.R
@@ -369,7 +369,7 @@ setMethod("right_join", signature = c(x = "ANY"),
 
     # Loop through individual TreeSEs and add them to tse
     if( length(x) > 0 ){
-        for( i in 1:length(x) ){
+        for( i in seq_len(length(x)) ){
             # Give message if TRUE
             if( verbose ){
                 message("\r", i+1, "/", length(x)+1, appendLF = FALSE)
@@ -765,7 +765,7 @@ setMethod("right_join", signature = c(x = "ANY"),
     # Get the shared class that is highest in hierarchy
     if( all( classes %in% allowed_classes[1] ) ){
         class <- allowed_classes[1]
-    } else if( all( classes %in% allowed_classes[1:2] ) ){
+    } else if( all( classes %in% allowed_classes[c(1,2)] ) ){
         class <- allowed_classes[2]
     } else {
         class <- allowed_classes[3]

diff --git a/R/runCCA.R b/R/runCCA.R
@@ -452,7 +452,7 @@ setMethod("runCCA", "SingleCellExperiment",
     # Get the dissimilarity matrix based on original dissimilarity index
     # provided by user. If the analysis is CCA, disable method; calculate
     # always euclidean distances because CCA is based on Euclidean distances.
-    if( length(class(rda)) == 1 && class(rda) == "cca" ){
+    if( length(class(rda)) == 1 && is(rda, 'cca') ){
         dist_mat <- vegdist(mat, method = "euclidean")
     } else{
         dist_mat <- vegdist(mat, method = method, ...)

diff --git a/R/splitOn.R b/R/splitOn.R
@@ -291,6 +291,7 @@ setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"),
             # If the returned value is a list, go through all of them
             if( is(x, 'SimpleList') ){
                 x <- SimpleList(lapply(x, .agglomerate_trees))
+
             } else {
                 # Otherwise, the returned value is TreeSE
                 x <- .agglomerate_trees(x)

diff --git a/R/subsampleCounts.R b/R/subsampleCounts.R
@@ -10,6 +10,8 @@
 #' instances where it can be useful.
 #' Note that the output of \code{subsampleCounts} is not the equivalent as the 
 #' input and any result have to be verified with the original dataset.
+#' To maintain the reproducibility, please define the seed using set.seed() 
+#' before implement this function.
 #'
 #' @param x A
 #'   \code{SummarizedExperiment} object.
@@ -28,7 +30,7 @@
 #'   simulated this can equal to lowest number of total counts 
 #'   found in a sample or a user specified number. 
 #'   
-#' @param seed A random number seed for reproducibility of sampling. 
+#'  
 #' 
 #' @param replace Logical Default is \code{TRUE}. The default is with 
 #'   replacement (\code{replace=TRUE}). 
@@ -68,10 +70,11 @@
 #' # they will be removed.
 #' data(GlobalPatterns)
 #' tse <- GlobalPatterns
+#' set.seed(123)
 #' tse.subsampled <- subsampleCounts(tse, 
 #'                                   min_size = 60000, 
-#'                                   name = "subsampled", 
-#'                                   seed = 123)
+#'                                   name = "subsampled" 
+#'                                   )
 #' tse.subsampled
 #' dim(tse)
 #' dim(tse.subsampled)
@@ -84,7 +87,7 @@ NULL
 setGeneric("subsampleCounts", signature = c("x"),
            function(x, assay.type = assay_name, assay_name = "counts", 
                     min_size = min(colSums2(assay(x))),
-                    seed = runif(1, 0, .Machine$integer.max), replace = TRUE,
+                    replace = TRUE,
                     name = "subsampled", verbose = TRUE, ...)
                standardGeneric("subsampleCounts"))
 
@@ -94,86 +97,78 @@ setGeneric("subsampleCounts", signature = c("x"),
 #' @aliases rarifyCounts
 #' @export
 setMethod("subsampleCounts", signature = c(x = "SummarizedExperiment"),
-    function(x, assay.type = assay_name, assay_name = "counts", 
-             min_size = min(colSums2(assay(x))),
-       seed = runif(1, 0, .Machine$integer.max), replace = TRUE, 
-       name = "subsampled", verbose = TRUE, ...){
-
-        warning("Subsampling/Rarefying may undermine downstream analyses ",
-                "and have unintended consequences. Therefore, make sure ",
-                "this normalization is appropriate for your data.",
-              call. = FALSE)
-        .check_assay_present(assay.type, x)
-        if(any(assay(x, assay.type) %% 1 != 0)){
-            warning("assay contains non-integer values. Only counts table ",
-                    "is applicable...")
-        }
-        if(!is.logical(verbose)){
-            stop("`verbose` has to be logical i.e. TRUE or FALSE")
-        }
-        if(verbose){
-            # Print to screen this value
-            message("`set.seed(", seed, ")` was used to initialize repeatable ",
-                    "random subsampling.","\nPlease record this for your ",
-                    "records so others can reproduce.")
-        }
-        if(!.is_numeric_string(seed)){
-            stop("`seed` has to be an numeric value See `?set.seed`")
-        } 
-        if(!is.logical(replace)){
-            stop("`replace` has to be logical i.e. TRUE or FALSE")
-        } 
-        # Check name
-        if(!.is_non_empty_string(name) ||
-           name == assay.type){
-            stop("'name' must be a non-empty single character value and be ",
-                 "different from `assay.type`.",
-                 call. = FALSE)
-        }
-        set.seed(seed)
-        # Make sure min_size is of length 1.
-        if(length(min_size) > 1){
-            stop("`min_size` had more than one value. ", 
-                 "Specifiy a single integer value.")
-            min_size <- min_size[1]    
-        }
-        if(!is.numeric(min_size) || 
-           as.integer(min_size) != min_size && min_size <= 0){
-            stop("min_size needs to be a positive integer value.")
-        }
-        # get samples with less than min number of reads
-        if(min(colSums2(assay(x, assay.type))) < min_size){
-            rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size]
-            # Return NULL, if no samples were found after subsampling
-            if( !any(!colnames(x) %in% rmsams) ){
-                stop("No samples were found after subsampling.",
-                     call. = FALSE)
-            }
-            if(verbose){
-                message(length(rmsams), " samples removed ",
-                        "because they contained fewer reads than `min_size`.")
-            }
-            # remove sample(s)
-            newtse <- x[, !colnames(x) %in% rmsams]
-        } else {
-            newtse <- x
-        }
-        newassay <- apply(assay(newtse, assay.type), 2, 
-                          .subsample_assay,
-                          min_size=min_size, replace=replace)
-        rownames(newassay) <- rownames(newtse)
-        # remove features not present in any samples after subsampling
-        message(paste(length(which(rowSums2(newassay) == 0)), "features", 
-                      "removed because they are not present in all samples", 
-                      "after subsampling."))
-        newassay <- newassay[rowSums2(newassay)>0,]
-        newtse <- newtse[rownames(newassay),]
-        assay(newtse, name, withDimnames=FALSE) <- newassay
-        newtse <- .add_values_to_metadata(newtse, 
-                                          "subsampleCounts_min_size",
-                                          min_size)
-        return(newtse)
-    }
+          function(x, assay.type = assay_name, assay_name = "counts", 
+                   min_size = min(colSums2(assay(x))),
+                    replace = TRUE, 
+                   name = "subsampled", verbose = TRUE, ...){
+
+              warning("Subsampling/Rarefying may undermine downstream analyses ",
+                      "and have unintended consequences. Therefore, make sure ",
+                      "this normalization is appropriate for your data.",
+                      call. = FALSE)
+              .check_assay_present(assay.type, x)
+              if(any(assay(x, assay.type) %% 1 != 0)){
+                  warning("assay contains non-integer values. Only counts table ",
+                          "is applicable...")
+              }
+              if(!is.logical(verbose)){
+                  stop("`verbose` has to be logical i.e. TRUE or FALSE")
+              }
+
+              if(!is.logical(replace)){
+                  stop("`replace` has to be logical i.e. TRUE or FALSE")
+              } 
+              # Check name
+              if(!.is_non_empty_string(name) ||
+                 name == assay.type){
+                  stop("'name' must be a non-empty single character value and be ",
+                       "different from `assay.type`.",
+                       call. = FALSE)
+              }
+              #set.seed(seed)
+              # Make sure min_size is of length 1.
+              if(length(min_size) > 1){
+                  stop("`min_size` had more than one value. ", 
+                       "Specifiy a single integer value.")
+                  min_size <- min_size[1]    
+              }
+              if(!is.numeric(min_size) || 
+                 as.integer(min_size) != min_size && min_size <= 0){
+                  stop("min_size needs to be a positive integer value.")
+              }
+              # get samples with less than min number of reads
+              if(min(colSums2(assay(x, assay.type))) < min_size){
+                  rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size]
+                  # Return NULL, if no samples were found after subsampling
+                  if( !any(!colnames(x) %in% rmsams) ){
+                      stop("No samples were found after subsampling.",
+                           call. = FALSE)
+                  }
+                  if(verbose){
+                      message(length(rmsams), " samples removed ",
+                              "because they contained fewer reads than `min_size`.")
+                  }
+                  # remove sample(s)
+                  newtse <- x[, !colnames(x) %in% rmsams]
+              } else {
+                  newtse <- x
+              }
+              newassay <- apply(assay(newtse, assay.type), 2, 
+                                .subsample_assay,
+                                min_size=min_size, replace=replace)
+              rownames(newassay) <- rownames(newtse)
+              # remove features not present in any samples after subsampling
+              message(paste(length(which(rowSums2(newassay) == 0)), "features", 
+                            "removed because they are not present in all samples", 
+                            "after subsampling."))
+              newassay <- newassay[rowSums2(newassay)>0,]
+              newtse <- newtse[rownames(newassay),]
+              assay(newtse, name, withDimnames=FALSE) <- newassay
+              newtse <- .add_values_to_metadata(newtse, 
+                                                "subsampleCounts_min_size",
+                                                min_size)
+              return(newtse)
+          }
 )
 
 

diff --git a/R/summaries.R b/R/summaries.R
@@ -479,7 +479,7 @@ setMethod("summary", signature = c(object = "SummarizedExperiment"),
 .check_NAs_assay_counts <- function(x, assay.type){
     assay.x <- .get_assay(x, assay.type)
     if(any(is.na(assay.x))) {
-        stop(paste0("There are samples with NAs in 'assay': ", assay.type),
+        stop("There are samples with NAs in 'assay': ", assay.type,
              " . This function is limited to sequencing data only. ",
              "Where raw counts do not usually have NAs. ",
              "Try to supply raw counts",

diff --git a/R/transformCounts.R b/R/transformCounts.R
@@ -565,7 +565,7 @@ setMethod("relAbundanceCounts", signature = c(x = "SummarizedExperiment"),
         pseudocount <- ifelse(pseudocount, min(mat[mat>0]), 0)
         # Report pseudocount if positive value
         if ( pseudocount > 0 ){
-            message(paste("A pseudocount of", pseudocount, "was applied."))
+            message("A pseudocount of ", pseudocount, " was applied.")
         }
     }
     # Give warning if pseudocount should not be added