Skip to content

Commit

Permalink
Fix critical issue with mergeFeatures
Browse files Browse the repository at this point in the history
Signed-off-by: Daena Rys <[email protected]>
  • Loading branch information
Daenarys8 committed May 6, 2024
1 parent aaa55f2 commit d205d74
Show file tree
Hide file tree
Showing 7 changed files with 380 additions and 353 deletions.
57 changes: 29 additions & 28 deletions R/agglomerate.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Agglomerate data using taxonomic information
#'
#' Agglomeration functions can be used to sum-up data based on specific criteria
#' Agglomeration functions can be used to sum-up data based on specific criteria
#' such as taxonomic ranks, variables or prevalence.
#'
#' @param x a
Expand All @@ -26,29 +26,29 @@
#' @param agglomerate.tree \code{TRUE} or \code{FALSE}: should
#' \code{rowTree()} also be agglomerated? (Default:
#' \code{agglomerate.tree = FALSE})
#'
#'
#' @param agglomerateTree alias for \code{agglomerate.tree}.
#'
#' @param ... arguments passed to \code{agglomerateByRank} function for
#' \code{SummarizedExperiment} objects,
#' to \code{getPrevalence} and \code{getPrevalentTaxa} and used in
#' to \code{getPrevalence} and \code{getPrevalentTaxa} and used in
#' \code{agglomeratebyPrevalence},
#' to \code{\link[=merge-methods]{mergeRows}} and
#' \code{\link[scuttle:sumCountsAcrossFeatures]{sumCountsAcrossFeatures}}.
#' \itemize{
#' \item{\code{remove_empty_ranks}}{A single boolean value for selecting
#' \item{\code{remove_empty_ranks}}{A single boolean value for selecting
#' whether to remove those columns of rowData that include only NAs after
#' agglomeration. (By default: \code{remove_empty_ranks = FALSE})}
#' \item{\code{make_unique}}{A single boolean value for selecting
#' \item{\code{make_unique}}{A single boolean value for selecting
#' whether to make rownames unique. (By default: \code{make_unique = TRUE})}
#' \item{\code{detection}}{Detection threshold for absence/presence.
#' Either an absolute value compared directly to the values of \code{x}
#' \item{\code{detection}}{Detection threshold for absence/presence.
#' Either an absolute value compared directly to the values of \code{x}
#' or a relative value between 0 and 1, if \code{as_relative = FALSE}.}
#' \item{\code{prevalence}}{Prevalence threshold (in 0 to 1). The
#' required prevalence is strictly greater by default. To include the
#' \item{\code{prevalence}}{Prevalence threshold (in 0 to 1). The
#' required prevalence is strictly greater by default. To include the
#' limit, set \code{include_lowest} to \code{TRUE}.}
#' \item{\code{as.relative}}{Logical scalar: Should the detection
#' threshold be applied on compositional (relative) abundances?
#' \item{\code{as.relative}}{Logical scalar: Should the detection
#' threshold be applied on compositional (relative) abundances?
#' (default: \code{FALSE})}
#' }
#'
Expand All @@ -66,14 +66,14 @@
#' your results. If no loops exist (loops meaning two higher ranks containing
#' the same lower rank), the results should be comparable. You can check for
#' loops using \code{\link[TreeSummarizedExperiment:detectLoop]{detectLoop}}.
#'
#'
#' Agglomeration sums up the values of assays at the specified taxonomic level. With
#' certain assays, e.g. those that include binary or negative values, this summing
#' can produce meaningless values. In those cases, consider performing agglomeration
#' first, and then applying the transformation afterwards.
#'
#' @return
#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
#' @return
#' \code{agglomerateByRank} returns a taxonomically-agglomerated,
#' optionally-pruned object of the same class as \code{x}.
#'
#' @name agglomerate-methods
Expand All @@ -92,16 +92,16 @@
#' ## How many taxa before/after agglomeration?
#' nrow(GlobalPatterns)
#' nrow(x1)
#'
#'
#' # agglomerate the tree as well
#' x2 <- agglomerateByRank(GlobalPatterns, rank="Family",
#' agglomerate.tree = TRUE)
#' nrow(x2) # same number of rows, but
#' rowTree(x1) # ... different
#' rowTree(x2) # ... tree
#'
#'
#' # If assay contains binary or negative values, summing might lead to meaningless
#' # values, and you will get a warning. In these cases, you might want to do
#' # values, and you will get a warning. In these cases, you might want to do
#' # agglomeration again at chosen taxonomic level.
#' tse <- transformAssay(GlobalPatterns, method = "pa")
#' tse <- agglomerateByRank(tse, rank = "Genus")
Expand All @@ -111,19 +111,19 @@
#' sum(is.na(rowData(GlobalPatterns)$Family))
#' x3 <- agglomerateByRank(GlobalPatterns, rank="Family", na.rm = TRUE)
#' nrow(x3) # different from x2
#'
#' # Because all the rownames are from the same rank, rownames do not include
#' # prefixes, in this case "Family:".
#'
#' # Because all the rownames are from the same rank, rownames do not include
#' # prefixes, in this case "Family:".
#' print(rownames(x3[1:3,]))
#'
#'
#' # To add them, use getTaxonomyLabels function.
#' rownames(x3) <- getTaxonomyLabels(x3, with_rank = TRUE)
#' print(rownames(x3[1:3,]))
#'
#'
#' # use 'remove_empty_ranks' to remove columns that include only NAs
#' x4 <- agglomerateByRank(GlobalPatterns, rank="Phylum", remove_empty_ranks = TRUE)
#' head(rowData(x4))
#'
#'
#' # If the assay contains NAs, you might want to consider replacing them,
#' # since summing-up NAs lead to NA
#' x5 <- GlobalPatterns
Expand All @@ -135,7 +135,7 @@
#' assay(x5)[ is.na(assay(x5)) ] <- 0
#' x6 <- agglomerateByRank(x5, "Kingdom")
#' head( assay(x6) )
#'
#'
#' ## Look at enterotype dataset...
#' data(enterotype)
#' ## Print the available taxonomic ranks. Shows only 1 available rank,
Expand Down Expand Up @@ -174,7 +174,7 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
call. = FALSE)
}
if(!.is_non_empty_string(rank)){
stop("'rank' must be an non empty single character value.",
stop("'rank' must be a non empty single character value.",
call. = FALSE)
}
if(!.is_a_bool(onRankOnly)){
Expand All @@ -189,7 +189,7 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
.check_taxonomic_rank(rank, x)
.check_for_taxonomic_data_order(x)
#

# Make a vector from the taxonomic data.
col <- which( taxonomyRanks(x) %in% rank )
tax_cols <- .get_tax_cols_from_se(x)
Expand Down Expand Up @@ -226,6 +226,7 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
# adjust rownames
rownames(x) <- getTaxonomyLabels(x, empty.fields, ...,
with_rank = FALSE, resolve_loops = FALSE)
rownames(x) <- sort(rownames(x))
# Remove those columns from rowData that include only NAs
x <- .remove_NA_cols_from_rowdata(x, ...)
x <- .add_values_to_metadata(x, "agglomerated_by_rank", rank)
Expand Down Expand Up @@ -339,7 +340,7 @@ setMethod("mergeFeaturesByRank", signature = c(x = "TreeSummarizedExperiment"),
.remove_NA_cols_from_rowdata <- function(x, remove_empty_ranks = FALSE, ...){
# Check remove_empty_ranks
if( !.is_a_bool(remove_empty_ranks) ){
stop("'remove_empty_ranks' must be a boolean value.",
stop("'remove_empty_ranks' must be a boolean value.",
call. = FALSE)
}
# If user wants to remove those columns
Expand Down Expand Up @@ -434,4 +435,4 @@ setMethod("mergeFeaturesByRank", signature = c(x = "TreeSummarizedExperiment"),
tree <- drop.tip(tree, remove_index)
}
return(tree)
}
}
Loading

0 comments on commit d205d74

Please sign in to comment.