diff --git a/DESCRIPTION b/DESCRIPTION index fccb7569a..a869dc843 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.1.5 +Version: 4.1.6 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), @@ -28,7 +28,7 @@ RoxygenNote: 7.3.2 Depends: R (>= 4.4.1), methods, - GiottoClass (>= 0.4.1) + GiottoClass (>= 0.4.5) Imports: BiocParallel, BiocSingular, @@ -37,8 +37,8 @@ Imports: dbscan (>= 1.1-3), ggraph, ggplot2 (>= 3.1.1), - GiottoUtils (>= 0.2.0), - GiottoVisuals (>= 0.2.6), + GiottoUtils (>= 0.2.2), + GiottoVisuals (>= 0.2.10), igraph (>= 1.2.4.1), Matrix (>= 1.6-2), MatrixGenerics, diff --git a/NAMESPACE b/NAMESPACE index 1c8052de4..cf94c401c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -182,6 +182,7 @@ export(doRandomWalkCluster) export(doSNNCluster) export(doScrubletDetect) export(doStardistSegmentation) +export(dotPlot) export(estimateAutomatedImageRegistrationWithSIFT) export(estimateImageBg) export(exportGiottoViewer) @@ -753,6 +754,7 @@ importFrom(GiottoVisuals,dimGenePlot3D) importFrom(GiottoVisuals,dimPlot) importFrom(GiottoVisuals,dimPlot2D) importFrom(GiottoVisuals,dimPlot3D) +importFrom(GiottoVisuals,dotPlot) importFrom(GiottoVisuals,getColors) importFrom(GiottoVisuals,giottoSankeyPlan) importFrom(GiottoVisuals,plotHeatmap) diff --git a/NEWS.md b/NEWS.md index cc8301f61..1fa9f3c82 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,25 @@ +# Giotto 4.1.6 (2024/12/09) + +## Bug fixes +* `doScrubletDetect()` seed setting + +## Enhancements +* `labelTransfer()` now has `integration_method = "harmony"` for label transferring with an integration pipeline. See ?labelTransfer and the `integration_method` section. +* `importXenium()` `load_transcripts()` can now return a `data.table` rather than the `giottoPoints` representation + +## New +* `doMesmerSegmentation()` and `doStardistSegmentation()` segmentation wrappers +* `.varexp()` internal for calculating SVD variance determined with support for partial SVDs +* `.cumvar()` internal for calculating cumulative variance explained +* re-export of `dotPlot()` from GiottoVisuals + +## Changes +* GiottoClass req raised to 0.4.5 +* GiottoUtils req raised to 0.2.2 +* GiottoVisuals req raised to 0.2.10 + + # Giotto 4.1.5 (2024/11/08) ## Enhancements diff --git a/R/clustering.R b/R/clustering.R index 7cc71d97c..1a4c8cf60 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -3309,20 +3309,31 @@ getDendrogramSplits <- function( #' one of the sets to the other based on KNN similarity voting in that space. #' @param x target object #' @param y source object +#' @param spat_unit spatial unit. A character vector of 2 can also be passed +#' for x (1) and y (2). Setting defaults with `activeSpatUnit()` may be easier +#' @param feat_type feature type. A character vector of 2 can also be passed +#' for x (1) and y (2). Setting defaults with `activeFeatType()` may be easier #' @param source_cell_ids cell/spatial IDs with the source labels to transfer #' @param target_cell_ids cell/spatial IDs to transfer the labels to. #' IDs from `source_cell_ids` are always included as well. #' @param labels metadata column in source with labels to transfer #' @param k number of k-neighbors to train a KNN classifier #' @param name metadata column in target to apply the full set of labels to +#' @param integration_method character. Integration method to use when +#' transferring labels. Options are "none" (default) and "harmony". See section +#' below for more info and params. #' @param prob output knn probabilities together with label predictions #' @param reduction reduction on cells or features (default = "cells") #' @param reduction_method shared reduction method (default = "pca" space) #' @param reduction_name name of shared reduction space (default name = "pca") #' @param dimensions_to_use dimensions to use in shared reduction space #' (default = 1:10) -#' @returns object `x` with new transferred labels added to metadata #' @inheritDotParams FNN::knn -train -test -cl -k -prob +#' @returns object `x` with new transferred labels added to metadata. If +#' running on `x` and `y` objects, `integration_method = "harmony"`, +#' `plot_join_labels = TRUE`, and `return_plot = TRUE` is set, output will +#' be instead a named list of `gobject` (updated `x`), and `label_source_plot` +#' and `label_target_plot` `ggplot2` objects #' @details #' This function trains a KNN classifier with [FNN::knn()]. #' The training data is from object `y` or `source_cell_ids` subset in `x` and @@ -3342,6 +3353,34 @@ getDendrogramSplits <- function( #' used to transfer labels from one set of annotated data to another dataset #' based on expression similarity after joining and integrating. #' +#' # integration_method +#' When running `labelTranfer()` on two `giotto` objects, an integration +#' pipeline can also be run to align the two datasets together before the +#' transfer. `integration_method = "harmony"` will make a temporary joined +#' object on shared features, filter to remove 0 values, run PCA, then harmony +#' integration, before performing the label transfer from `y` to `x` on the +#' integrated harmony embedding space. Additional params that can be used with +#' this method are:\cr +#' +#' * `source_cell_ids` - character. subset of `y` cells to use +#' * `target_cell_ids` - character. subset of `x` cells to use +#' * `expression_values` - character. expression values in `x` and `y` to use +#' to generate combined space. Default = `"raw"` +#' * `use_hvf` - logical. whether to calculate highly variable features to use +#' for PCA calculation. Default = `TRUE`, but setting `FALSE` is recommended if +#' any of `x` or `y` has roughly 1000 features or fewer +#' * `plot_join_labels` - logical. Whether to plot source labels and final +#' labels in the joine object UMAP. +#' * `normalize_params` - named list. Additional params to pass to +#' `normalizeGiotto()` if desired. +#' * `pca_params` - named list. Additional params to pass to `runPCA()` if +#' desired. +#' * `integration_params` - named list. Additional params to pass to +#' `runGiottoHarmony()` if desired. +#' * `plot_params` - named list. Additional params to pass to `plotUMAP()` if +#' desired. Only relevant when `plot_join_labels = TRUE` +#' * `verbose` - verbosity +#' #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' id_subset <- sample(spatIDs(g), 300) @@ -3364,45 +3403,314 @@ setGeneric( function(x, y, ...) standardGeneric("labelTransfer") ) + +.lab_transfer_harmony <- function(x, y, + source_cell_ids = NULL, + target_cell_ids = NULL, + expression_values = "raw", + dimensions_to_use = 1:10, + spat_unit = NULL, + feat_type = NULL, + use_hvf = TRUE, + plot_join_labels = FALSE, + normalize_params = list(), + pca_params = list(), + integration_params = list(), + plot_params = list(), + verbose = NULL, + ... # passes to labelTransfer +) { + # NSE vars + cell_ID <- transfer <- transfer_prob <- NULL + + .check_plot_output <- function(pparam) { + direct_setting <- plot_params[[pparam]] + if (!is.null(direct_setting)) return(direct_setting) + any(instructions(x, pparam), instructions(y, pparam)) + } + + .show_plot <- .check_plot_output("show_plot") + .return_plot <- .check_plot_output("return_plot") + .save_plot <- .check_plot_output("save_plot") + + # turn off lower level updates + options("giotto.update_param" = FALSE) + on.exit({ + options("giotto.update_param" = TRUE) + }, add = TRUE) + + vmsg(.v = verbose, "1. Creating temporary joined object...") + # match features + ufids <- intersect(featIDs(x), featIDs(y)) + if (length(ufids) == 0L) { + stop("labelTransfer harmony: No common features between `x` and `y`", + call. = FALSE) + } + + # get needed subobjects + xdata <- x[[ + "expression", + expression_values, + spat_unit = spat_unit[[1]], + feat_type = feat_type[[1]], + ]] + ydata <- y[[ + "expression", + expression_values, + spat_unit = spat_unit[[2]], + feat_type = feat_type[[2]], + ]] + ymeta <- y[[ + "cell_metadata", + spat_unit = spat_unit[[2]], + feat_type = feat_type[[2]] + ]] + + # harmonize nesting + objName(xdata) <- rep("raw", length(xdata)) + objName(ydata) <- rep("raw", length(ydata)) + ydata <- c(ydata, ymeta) + spatUnit(xdata) <- rep("cell", length(xdata)) + featType(xdata) <- rep("rna", length(xdata)) + spatUnit(ydata) <- rep("cell", length(ydata)) + featType(ydata) <- rep("rna", length(ydata)) + + dummy_sl_x <- data.table::data.table( + cell_ID = colnames(xdata[[1]][]), + sdimx = 0, sdimy = 0 + ) |> + createSpatLocsObj( + name = "raw", spat_unit = "cell", verbose = FALSE + ) + dummy_sl_y <- data.table::data.table( + cell_ID = colnames(ydata[[1]][]), + sdimx = 0, sdimy = 0 + ) |> + createSpatLocsObj( + name = "raw", spat_unit = "cell", verbose = FALSE + ) + dummy_instrs <- instructions(x) + + xdata <- c(xdata, dummy_sl_x) + ydata <- c(ydata, dummy_sl_y) + + # generate temp objects + xj <- setGiotto(giotto(instructions = dummy_instrs, initialize = FALSE), + xdata, verbose = FALSE) + yj <- setGiotto(giotto(instructions = dummy_instrs, initialize = FALSE), + ydata, verbose = FALSE) + + xj <- subsetGiotto(xj, feat_ids = ufids, cell_ids = target_cell_ids) + yj <- subsetGiotto(yj, feat_ids = ufids, cell_ids = source_cell_ids) + + # join on intersected feats and specified cell_IDs + j <- joinGiottoObjects(list(xj, yj), gobject_names = c("x", "y")) + + # cleanup + rm(y, xj, yj, xdata, ydata, ymeta, dummy_sl_x, dummy_sl_y, dummy_instrs) + + vmsg(.v = verbose, "2. Performing simple filter...") + j <- filterGiotto(j, + expression_threshold = 1, + min_det_feats_per_cell = 1, + feat_det_in_min_cells = 1, + verbose = FALSE + ) + + vmsg(.v = verbose, "3. Running normalize...") + normalize_params$verbose <- FALSE + # process + j <- do.call(normalizeGiotto, + args = c(list(gobject = j), normalize_params) + ) + + if (use_hvf) { + vmsg(.v = verbose, "-- Calculating HVF...") + j <- calculateHVF(j) + pca_params$feats_to_use = pca_params$feats_to_use %null% "hvf" + } else { + pca_params <- c(pca_params, list(feats_to_use = NULL)) + } + + vmsg(.v = verbose, "4. Running PCA...") + j <- do.call(runPCA, args = c(list(gobject = j), pca_params)) + + # TODO determine dims to use via cumvar >= 30% + + # harmony + integration_params$name <- "harmony" + integration_params$vars_use = "list_ID" + integration_params$dim_reduction_name = "pca" + integration_params$dimensions_to_use = dimensions_to_use + + vmsg(.v = verbose, "5. Generating shared Harmony embedding space...") + j <- do.call(runGiottoHarmony, + c(list(gobject = j), integration_params) + ) + + # transfer + transfer_args <- list(...) + transfer_args$x <- j + transfer_args$source_cell_ids <- spatIDs(j, subset = list_ID == "y") + transfer_args$dimensions_to_use = dimensions_to_use + transfer_args$reduction = "cells" + transfer_args$reduction_method = "harmony" + transfer_args$reduction_name = "harmony" + + vmsg(.v = verbose, "6. Performing label transfer...") + j <- do.call(labelTransfer, transfer_args) + + res <- pDataDT(j) + res <- res[list_ID == "x"] + res[, cell_ID := gsub("^x-", "", cell_ID)] + tname <- transfer_args$name + res <- res[, .SD, .SDcols = c("cell_ID", tname, paste0(tname, "_prob"))] + + x <- addCellMetadata(x, + new_metadata = res, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + # plot outputs + if (!any(.save_plot, .return_plot, .show_plot) && plot_join_labels) { + return(x) # return early if plotting not needed + } + + vmsg(.v = verbose, "7. plot_join_labels = TRUE: Running UMAP...") + + j <- runUMAP(j, + dim_reduction_to_use = "harmony", + dim_reduction_name = "harmony", + name = "harmony_umap", + dimensions_to_use = dimensions_to_use + ) + + plotlabs <- unique(res[[tname]]) + default_colors <- getRainbowColors( + n = length(plotlabs), slim = c(0.5, 1), vlim = c(0.3, 1) + ) + names(default_colors) <- plotlabs + + plot_params$gobject <- j + plot_params$dim_reduction_name <- "harmony_umap" + plot_params$color_as_factor <- TRUE + plot_params$point_size <- plot_params$point_size %null% 0.5 + plot_params$point_border_stroke <- plot_params$point_border_stroke %null% 0 + plot_params$cell_color_code <- + plot_params$cell_color_code %null% default_colors + + p1_params <- p2_params <- plot_params + + p1_params$cell_color <- transfer_args$labels + p1_params$select_cells <- spatIDs(j, subset = list_ID == "y") + p1_params$other_point_size <- p1_params$other_point_size %null% 0.3 + p1_params$show_plot <- FALSE + p1_params$return_plot <- TRUE + p1_params$title <- "source labels" + + p2_params$cell_color <- tname + p2_params$show_plot <- FALSE + p2_params$return_plot <- TRUE + p2_params$title <- "final labels" + + plot_list <- list(label_source_plot = do.call(plotUMAP, p1_params), + label_final_plot = do.call(plotUMAP, p2_params)) + + if (.show_plot) { + print(plot_grid(plotlist = plot_list)) + } + if (.save_plot) { + plot_output_handler(x, plot_list[[1]], + save_plot = .save_plot, + default_save_name = "transferLabels_source", + ) + } + if (.return_plot) { + x <- c(list(gobject = x), plot_list) + } + + return(x) +} + + +# ** giotto, giotto #### + #' @rdname labelTransfer #' @export setMethod("labelTransfer", signature(x = "giotto", y = "giotto"), function( x, y, - spat_unit = NULL, - feat_type = NULL, labels, k = 10, name = paste0("trnsfr_", labels), + integration_method = c("none", "harmony"), prob = TRUE, reduction = "cells", reduction_method = "pca", reduction_name = "pca", dimensions_to_use = 1:10, + spat_unit = NULL, + feat_type = NULL, return_gobject = TRUE, ...) { - # NSE vars - temp_name <- cell_ID <- temp_name_prob <- NULL package_check(pkg_name = "FNN", repository = "CRAN") - spat_unit <- set_default_spat_unit(x, spat_unit = spat_unit) - feat_type <- set_default_feat_type(x, - spat_unit = spat_unit, feat_type = feat_type + + integration_method <- match.arg(integration_method, choices = c( + "none", "harmony" + )) + + if (!labels %in% colnames(pDataDT(y))) { + stop("`labels` not found in cell metadata of `y`", call. = FALSE) + } + + # norm su and ft lengths + if (length(spat_unit) == 1L) { + spat_unit <- rep(spat_unit, 2L) + } + if (length(feat_type) == 1L) { + feat_type <- rep(feat_type, 2L) + } + + if (integration_method == "harmony") { + a <- get_args_list(...) + a$integration_method <- NULL + # this function needs error handling or it locks the console + res <- tryCatch({ + do.call(.lab_transfer_harmony, a) + }, error = function(e) { + stop(wrap_txtf("labelTransfer: harmony:\n%s", e$message), + call. = FALSE) + }) + return(res) + } + + # NSE vars + temp_name <- cell_ID <- temp_name_prob <- NULL + + su1 <- set_default_spat_unit(x, spat_unit = spat_unit[[1]]) + su2 <- set_default_spat_unit(y, spat_unit = spat_unit[[2]]) + ft1 <- set_default_feat_type( + x, spat_unit = su1, feat_type = feat_type[[1]] + ) + ft2 <- set_default_feat_type( + y, spat_unit = su2, feat_type = feat_type[[2]] ) # get data cx_src <- getCellMetadata(y, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = su2, + feat_type = ft2, output = "data.table" ) cx_tgt <- getCellMetadata(x, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = su1, + feat_type = ft1, output = "data.table" ) dim_coord <- getDimReduction(x, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = su1, + feat_type = ft1, reduction = reduction, reduction_method = reduction_method, name = reduction_name, @@ -3479,8 +3787,8 @@ setMethod("labelTransfer", signature(x = "giotto", y = "giotto"), function( if (return_gobject) { x <- addCellMetadata(x, - spat_unit = spat_unit, - feat_type = feat_type, + spat_unit = su1, + feat_type = ft1, new_metadata = cx_tgt, by_column = TRUE, column_cell_ID = "cell_ID" @@ -3491,6 +3799,9 @@ setMethod("labelTransfer", signature(x = "giotto", y = "giotto"), function( } }) + +# ** giotto, missing #### + #' @rdname labelTransfer #' @export setMethod("labelTransfer", signature(x = "giotto", y = "missing"), function( diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 9dc52cfc9..506410479 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -238,6 +238,7 @@ setMethod( dropcols = c(), qv_threshold = obj@qv, cores = determine_cores(), + output = c("giottoPoints", "data.table"), verbose = NULL) { .xenium_transcript( path = path, @@ -247,6 +248,7 @@ setMethod( dropcols = dropcols, qv_threshold = qv_threshold, cores = cores, + output = output, verbose = verbose ) } @@ -690,6 +692,7 @@ importXenium <- function(xenium_dir = NULL, qv_threshold = 20) { dropcols = c(), qv_threshold = 20, cores = determine_cores(), + output = c("giottoPoints", "data.table"), verbose = NULL) { if (missing(path)) { stop(wrap_txt( @@ -704,6 +707,8 @@ importXenium <- function(xenium_dir = NULL, qv_threshold = 20) { vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) + output <- match.arg(output, choices = c("giottoPoints", "data.table")) + # read in as data.table a <- list( path = path, @@ -726,6 +731,8 @@ importXenium <- function(xenium_dir = NULL, qv_threshold = 20) { y <- NULL # NSE var if (flip_vertical) tx[, y := -y] + if (output == "data.table") return(tx) + # create gpoints gpointslist <- createGiottoPoints( x = tx, @@ -734,7 +741,8 @@ importXenium <- function(xenium_dir = NULL, qv_threshold = 20) { verbose = FALSE ) - if (inherits(gpointslist, "list")) { + # enforce list + if (!inherits(gpointslist, "list")) { gpointslist <- list(gpointslist) } diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 8bd3bccaa..3b26e77b1 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -1928,24 +1928,9 @@ jackstrawPlot <- function( } n <- ncol(dat) m <- nrow(dat) - ndf <- min(m, n - 1, ncp) # this is a limitation of svd singular values - sum_of_squared_singular_vals <- sum(dat^2) - - # pick SVD fun based on whether partial or full is appropriate - # These biocsingular functions should not scale or center - svd_fun <- if (ndf >= 0.5 * m || ndf >= 100) { - BiocSingular::runExactSVD - } else { - BiocSingular::runIrlbaSVD - } # partial SVDs - - .calc_svd_var_explained <- function(x, k) { - res <- svd_fun(x, k = k) - singular_val_square <- res$d[1:k]^2 - return(singular_val_square / sum_of_squared_singular_vals) - } - - dstat <- .calc_svd_var_explained(dat, k = ndf) + ndf <- min(m, n - 1, ncp) # this is also calculated in .varexp + + dstat <- .varexp(dat, k = ncp) cum_var_explained <- cumsum(dstat) # randomize and compare @@ -1961,7 +1946,7 @@ jackstrawPlot <- function( for (i in seq_len(iter)) { pb() dat0 <- t(apply(dat, 1, sample)) - dstat0[i, ] <- .calc_svd_var_explained(dat0, k = ndf) + dstat0[i, ] <- .varexp(dat0, k = ncp) } }) @@ -1977,7 +1962,39 @@ jackstrawPlot <- function( return(list(r = r, p = p, cum_var_explained = cum_var_explained)) } +# calculate SVD variance explained, with support for partial SVDs +.varexp <- function(dat, k = 20) { + if (missing(dat)) { + stop("`dat` is required!") + } + n <- ncol(dat) + m <- nrow(dat) + ndf <- min(m, n - 1, k) # this is a limitation of svd singular values + sum_of_squared_singular_vals <- sum(dat^2) + + # pick SVD fun based on whether partial or full is appropriate + # These biocsingular functions should not scale or center + svd_fun <- if (ndf >= 0.5 * m || ndf >= 100) { + BiocSingular::runExactSVD + } else { + BiocSingular::runIrlbaSVD + } # partial SVDs + + res <- svd_fun(dat, k) + singular_val_square <- res$d[1:k]^2 + perc <- singular_val_square / sum_of_squared_singular_vals + return(perc) +} +# cumulative variance explained +.cumvar <- function(dat, k = 20, last = TRUE) { + a <- get_args_list(keep = c("dat", "k")) + res <- cumsum(do.call(.varexp, a)) + if (last) { + res <- tail(res, 1L) + } + return(res) +} #' @title signPCA diff --git a/R/normalize.R b/R/normalize.R index 510788d8f..47ca2eca5 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -425,8 +425,10 @@ normalizeGiotto <- function(gobject, ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- setGiotto(gobject, norm_expr, initialize = FALSE) - gobject <- setGiotto(gobject, norm_scaled_expr, initialize = FALSE) + gobject <- setGiotto( + gobject, norm_expr, verbose = verbose, initialize = FALSE) + gobject <- setGiotto( + gobject, norm_scaled_expr, verbose = verbose, initialize = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 6. return Giotto object diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 0f422ceae..2740134f0 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -73,11 +73,9 @@ doScrubletDetect <- function( # set seed if (!is.null(seed)) { - seed_number <- as.numeric(seed) - reticulate::py_set_seed( - seed = seed_number, - disable_hash_randomization = TRUE - ) + seed_number <- as.integer(seed) + } else { + seed_number <- random_seed() } # Set feat_type and spat_unit @@ -116,7 +114,8 @@ doScrubletDetect <- function( min_counts = min_counts, min_cells = min_cells, min_gene_variability_pctl = min_gene_variability_pctl, - n_prin_comps = n_prin_comps + n_prin_comps = n_prin_comps, + seed_number = seed_number ) scrublet_out <- data.table::data.table( diff --git a/R/spatial_enrichment.R b/R/spatial_enrichment.R index 8864acc02..ec2852a73 100644 --- a/R/spatial_enrichment.R +++ b/R/spatial_enrichment.R @@ -2279,7 +2279,7 @@ enrich_deconvolution <- function( ct_exp, cutoff) { ##### generate enrich 0/1 matrix based on expression matrix - ct_exp <- ct_exp[rowSums(ct_exp) > 0, ] + ct_exp <- ct_exp[rowSums(ct_exp) > 0, , drop = FALSE] enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) rowmax_col <- Rfast::rowMaxs(ct_exp) for (i in seq_along(rowmax_col)) { @@ -2321,9 +2321,9 @@ enrich_deconvolution <- function( ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) - select_sig_exp <- ct_exp[uniq_ct_gene, ct] + select_sig_exp <- ct_exp[uniq_ct_gene, ct, drop = FALSE] cluster_i_cell <- which(cluster_info == cluster_sort[i]) - cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell] + cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell, drop = FALSE] cluster_i_dwls <- optimize_deconvolute_dwls( cluster_cell_exp, select_sig_exp @@ -2383,9 +2383,9 @@ spot_deconvolution <- function( ct_gene <- c(ct_gene, sig_gene_j) } uniq_ct_gene <- intersect(rownames(expr), unique(ct_gene)) - select_sig_exp <- ct_exp[uniq_ct_gene, ct_i] + select_sig_exp <- ct_exp[uniq_ct_gene, ct_i, drop = FALSE] cluster_i_cell <- which(cluster_info == cluster_sort[i]) - cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell] + cluster_cell_exp <- expr[uniq_ct_gene, cluster_i_cell, drop = FALSE] ###### calculate ###### overlap signature with spatial genes all_exp <- Matrix::rowMeans(cluster_cell_exp) diff --git a/R/suite_reexports.R b/R/suite_reexports.R index 905811ab7..833d0b6c6 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -471,6 +471,8 @@ GiottoClass::writeGiottoLargeImage #' @export GiottoVisuals::addGiottoImageToSpatPlot #' @export +GiottoVisuals::dotPlot +#' @export GiottoVisuals::dimCellPlot #' @export GiottoVisuals::dimCellPlot2D diff --git a/inst/python/python_scrublet.py b/inst/python/python_scrublet.py index 4b856b53d..faedcc0e5 100644 --- a/inst/python/python_scrublet.py +++ b/inst/python/python_scrublet.py @@ -1,13 +1,14 @@ import scrublet as scr -def python_scrublet(counts_matrix, expected_doublet_rate, min_counts, min_cells, min_gene_variability_pctl, n_prin_comps): +def python_scrublet(counts_matrix, expected_doublet_rate, min_counts, min_cells, min_gene_variability_pctl, n_prin_comps, seed_number=1234): min_counts = int(min_counts) min_cells = int(min_cells) min_gene_variability_pctl = int(min_gene_variability_pctl) n_prin_comps = int(n_prin_comps) + random_state = int(seed_number) - scrub = scr.Scrublet(counts_matrix=counts_matrix, expected_doublet_rate=expected_doublet_rate) + scrub = scr.Scrublet(counts_matrix=counts_matrix, expected_doublet_rate=expected_doublet_rate, random_state=random_state) doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=min_counts,min_cells=min_cells, min_gene_variability_pctl=min_gene_variability_pctl, n_prin_comps=n_prin_comps) return_list = [] diff --git a/man/labelTransfer.Rd b/man/labelTransfer.Rd index dbf876a0b..31da8a1eb 100644 --- a/man/labelTransfer.Rd +++ b/man/labelTransfer.Rd @@ -10,16 +10,17 @@ voting} \S4method{labelTransfer}{giotto,giotto}( x, y, - spat_unit = NULL, - feat_type = NULL, labels, k = 10, name = paste0("trnsfr_", labels), + integration_method = c("none", "harmony"), prob = TRUE, reduction = "cells", reduction_method = "pca", reduction_name = "pca", dimensions_to_use = 1:10, + spat_unit = NULL, + feat_type = NULL, return_gobject = TRUE, ... ) @@ -53,6 +54,10 @@ voting} \item{name}{metadata column in target to apply the full set of labels to} +\item{integration_method}{character. Integration method to use when +transferring labels. Options are "none" (default) and "harmony". See section +below for more info and params.} + \item{prob}{output knn probabilities together with label predictions} \item{reduction}{reduction on cells or features (default = "cells")} @@ -64,6 +69,12 @@ voting} \item{dimensions_to_use}{dimensions to use in shared reduction space (default = 1:10)} +\item{spat_unit}{spatial unit. A character vector of 2 can also be passed +for x (1) and y (2). Setting defaults with \code{activeSpatUnit()} may be easier} + +\item{feat_type}{feature type. A character vector of 2 can also be passed +for x (1) and y (2). Setting defaults with \code{activeFeatType()} may be easier} + \item{...}{ Arguments passed on to \code{\link[FNN:knn]{FNN::knn}} \describe{ @@ -76,7 +87,11 @@ voting} IDs from \code{source_cell_ids} are always included as well.} } \value{ -object \code{x} with new transferred labels added to metadata +object \code{x} with new transferred labels added to metadata. If +running on \code{x} and \code{y} objects, \code{integration_method = "harmony"}, +\code{plot_join_labels = TRUE}, and \code{return_plot = TRUE} is set, output will +be instead a named list of \code{gobject} (updated \code{x}), and \code{label_source_plot} +and \code{label_target_plot} \code{ggplot2} objects } \description{ When two sets of data share an embedding space, transfer the labels from @@ -101,6 +116,36 @@ labels to the remaining cells in the target Giotto object. It can also be used to transfer labels from one set of annotated data to another dataset based on expression similarity after joining and integrating. } +\section{integration_method}{ +When running \code{labelTranfer()} on two \code{giotto} objects, an integration +pipeline can also be run to align the two datasets together before the +transfer. \code{integration_method = "harmony"} will make a temporary joined +object on shared features, filter to remove 0 values, run PCA, then harmony +integration, before performing the label transfer from \code{y} to \code{x} on the +integrated harmony embedding space. Additional params that can be used with +this method are:\cr +\itemize{ +\item \code{source_cell_ids} - character. subset of \code{y} cells to use +\item \code{target_cell_ids} - character. subset of \code{x} cells to use +\item \code{expression_values} - character. expression values in \code{x} and \code{y} to use +to generate combined space. Default = \code{"raw"} +\item \code{use_hvf} - logical. whether to calculate highly variable features to use +for PCA calculation. Default = \code{TRUE}, but setting \code{FALSE} is recommended if +any of \code{x} or \code{y} has roughly 1000 features or fewer +\item \code{plot_join_labels} - logical. Whether to plot source labels and final +labels in the joine object UMAP. +\item \code{normalize_params} - named list. Additional params to pass to +\code{normalizeGiotto()} if desired. +\item \code{pca_params} - named list. Additional params to pass to \code{runPCA()} if +desired. +\item \code{integration_params} - named list. Additional params to pass to +\code{runGiottoHarmony()} if desired. +\item \code{plot_params} - named list. Additional params to pass to \code{plotUMAP()} if +desired. Only relevant when \code{plot_join_labels = TRUE} +\item \code{verbose} - verbosity +} +} + \examples{ g <- GiottoData::loadGiottoMini("visium") id_subset <- sample(spatIDs(g), 300) diff --git a/man/reexports.Rd b/man/reexports.Rd index f9575eb02..15e96008b 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -223,6 +223,7 @@ \alias{updateGiottoPolygonObject} \alias{writeGiottoLargeImage} \alias{addGiottoImageToSpatPlot} +\alias{dotPlot} \alias{dimCellPlot} \alias{dimCellPlot2D} \alias{dimFeatPlot2D} @@ -301,6 +302,6 @@ below to see their documentation. \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} - \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot3D}}, \code{\link[GiottoVisuals:dimFeatPlot3D]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot3D}}, \code{\link[GiottoVisuals:spatDimFeatPlot3D]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatFeatPlot3D}}, \code{\link[GiottoVisuals:spatFeatPlot3D]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} + \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot3D}}, \code{\link[GiottoVisuals:dimFeatPlot3D]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{dotPlot}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot3D}}, \code{\link[GiottoVisuals:spatDimFeatPlot3D]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatFeatPlot3D}}, \code{\link[GiottoVisuals:spatFeatPlot3D]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} }}