From ec0e4024b621a36a8d6ec1be9c4f4a7f7021e3f9 Mon Sep 17 00:00:00 2001
From: George Chen <72078254+jiajic@users.noreply.github.com>
Date: Thu, 27 Feb 2025 20:12:44 -0500
Subject: [PATCH 1/2] WIP - all done except limma

---
 NAMESPACE            |   4 +
 R/normalize.R        | 843 ++++++++++++++++++++++++++++++++++++++++++-
 R/package_imports.R  |   1 +
 R/zzz.R              |  33 ++
 man/norm_default.Rd  |  45 +++
 man/norm_l2.Rd       |  46 +++
 man/norm_library.Rd  |  45 +++
 man/norm_log.Rd      |  44 +++
 man/norm_osmfish.Rd  |  54 +++
 man/norm_pearson.Rd  |  68 ++++
 man/norm_quantile.Rd |  59 +++
 man/norm_tfidf.Rd    |  52 +++
 man/processData.Rd   |  55 +++
 man/process_param.Rd |  59 +++
 man/scale_default.Rd |  30 ++
 man/scale_zscore.Rd  |  40 ++
 16 files changed, 1474 insertions(+), 4 deletions(-)
 create mode 100644 man/norm_default.Rd
 create mode 100644 man/norm_l2.Rd
 create mode 100644 man/norm_library.Rd
 create mode 100644 man/norm_log.Rd
 create mode 100644 man/norm_osmfish.Rd
 create mode 100644 man/norm_pearson.Rd
 create mode 100644 man/norm_quantile.Rd
 create mode 100644 man/norm_tfidf.Rd
 create mode 100644 man/processData.Rd
 create mode 100644 man/process_param.Rd
 create mode 100644 man/scale_default.Rd
 create mode 100644 man/scale_zscore.Rd

diff --git a/NAMESPACE b/NAMESPACE
index cf94c401c..508e29ff6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -38,6 +38,7 @@ export(addSpatialCentroidLocationsLayer)
 export(addStatistics)
 export(addVisiumPolygons)
 export(adjustGiottoMatrix)
+export(adjustParam)
 export(aggregateStacks)
 export(aggregateStacksExpression)
 export(aggregateStacksLocations)
@@ -286,6 +287,7 @@ export(makeSignMatrixDWLSfromMatrix)
 export(makeSignMatrixPAGE)
 export(makeSignMatrixRank)
 export(mergeClusters)
+export(normParam)
 export(normalizeGiotto)
 export(objHistory)
 export(objName)
@@ -394,6 +396,7 @@ export(sankeyRelate)
 export(sankeySet)
 export(sankeySetAddresses)
 export(saveGiotto)
+export(scaleParam)
 export(screePlot)
 export(selectPatternGenes)
 export(setCellMetadata)
@@ -523,6 +526,7 @@ import(ggplot2)
 import(methods)
 import(stats, except = density)
 import(utils)
+importClassesFrom(Matrix,Matrix)
 importClassesFrom(data.table,data.table)
 importFrom(GiottoClass,"activeFeatType<-")
 importFrom(GiottoClass,"activeSpatUnit<-")
diff --git a/R/normalize.R b/R/normalize.R
index 47ca2eca5..c6085b504 100644
--- a/R/normalize.R
+++ b/R/normalize.R
@@ -1,3 +1,810 @@
+# Documentation ####
+#' @name processData
+#' @title Composable Data Processing
+#' @description
+#' Perform data transformations, or set up chains of transformations and
+#' operations to be applied to matrix type data. `processData()` is a generic
+#' for which methods can be defined off both `x` (the data to transform),
+#' and `param` (the transform operation).
+#' @param x data to transform
+#' @param param S4 parameter class defining the transform operation and
+#' params affecting it.
+#' @param name character. [Object name][GiottoClass::giotto_schema] to assign
+#' to the output.
+#' @param \dots additional params to pass
+#' @examples
+#' m <- matrix(c(0, 0, 3, 2, 0, 5, 4, 0, 0, 1, 12, 0), nrow = 3)
+#' 
+#' # single operation
+#' lib_norm <- normParam("library")
+#' lib_norm$scalefactor <- 5000 # alter a default param of library norm
+#' processData(m, lib_norm)
+#' 
+#' # chained operations
+#' log_norm <- normParam("log")
+#' zscore_cols <- scaleParam("zscore")
+#' zscore_rows <- scaleParam("zscore", MARGIN = 1)
+#' # this is essentially the same as the default giotto normalization
+#' # only difference is the library norm scalefactor change.
+#' processData(m, list(lib_norm, log_norm, zscore_cols, zscore_rows))
+#' @seealso [process_param] for processing operations that can be performed
+#' through `processData()`
+#' @md
+NULL
+
+#' @name process_param
+#' @title Data Processing Parameter Class Factories
+#' @description Data processing operations in Giotto Suite can be divided into
+#' normalization, scaling, and adjustments
+#' @param method character. Name of method to use. See details.
+#' @param \dots (optional) Additional named parameters relevant to the param 
+#' class.
+#' @section normParam methods: 
+#' 
+#' * [`"default"`][norm_default] - default Giotto normalizations steps 
+#' (library + log norms)
+#' * [`"library"`][norm_library] - library normalization
+#' * [`"log"`][norm_log] - log normalization
+#' * [`"osmfish"`][norm_osmfish] - osmfish normalization method
+#' * [`"pearson"`][norm_pearson] - Lause/Kobak 2020 pearson residuals
+#' normalization
+#' * [`"quantile"`][norm_quantile] - quantile normalization
+#' * [`"tf-idf"`][norm_tfidf] - Term Frequency-Inverse Document Frequency
+#' * [`"l2"`][norm_l2] - L2 normalization (also known as Euclidean 
+#' normalization)
+#' 
+#' @section scaleParam methods: 
+#' 
+#' * [`"default"`][scale_default] - default Giotto scaling steps (scale along
+#' features then cells)
+#' * [`"zscore"`][scale_zscore] - essentially the same as `base::scale()`, but
+#' with a `MARGIN` param allowing scaling long either cols or rows
+#' 
+#' @section adjustParam methods:
+#' 
+#' * `"limma"` - limma batch correction
+#' @md
+NULL
+
+#' @name norm_default
+#' @title Default Giotto Normalization
+#' @description
+#' Expression matrix normalization method.
+#' 
+#' Steps:
+#' 
+#' 1. [Total library size][norm_library] normalization and scaling by 
+#' a custom scale-factor.
+#' 2. [Log][norm_log] transformation of data.
+#' 
+#' @section params: 
+#' 
+#' \tabular{ll}{
+#'   `library_size_norm` \tab logical (default = `TRUE`). whether to perform
+#'   library size normalization \cr
+#'   `scalefactor` \tab numeric (default = 6000). Scalefactor to use after
+#'   library size normalization. (skipped if `library_size_norm = FALSE`) \cr
+#'   `log_norm` \tab logical (default = `TRUE`). Whether to transform values to
+#'   log-scale. \cr
+#'   `log_offset` \tab numeric (default = 1). If `log_norm = TRUE`, offset
+#'   value to add to expression values to avoid `log(0)` \cr
+#'   `logbase` \tab numeric (default = 2). If `log_norm = TRUE`, log base to
+#'   use to log normalize expression values
+#' }
+#' @family normalization parameters
+#' @seealso [process_param]
+#' @md
+NULL
+
+#' @name norm_library
+#' @title Library Size Normalization
+#' @description
+#' Normalize expression matrix for total library size and then scale by
+#' a custom scalefactor.
+#' 
+#' This method does not work well when any cells/samples
+#' have a library size of 0, so filtering prior to this is recommended.
+#' 
+#' \deqn{\LARGE
+#' x'_{i,j} = \frac{x_{i,j}}{\sum_{i} x_{i,j}} \times k
+#' }
+#' Where:
+#' 
+#' * (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{x'_{i,j}}) is the library normalized and scaled expression value for
+#' feature \eqn{i} in sample \eqn{j}
+#' * (k) is a scalefactor applied after normalization
+#' 
+#' @section params:
+#' 
+#' \tabular{ll}{
+#'   `scalefactor` \tab numeric (default = 6000). Scalefactor to use after 
+#'   library size normalization. Expressed as ***k*** in the above equation
+#' }
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_log
+#' @title Log Normalization
+#' @description
+#' Apply a log normalization
+#' 
+#' \deqn{\LARGE
+#' x'_{i,j} = \frac{\log(x_{i,j} + b)}{\log(a)}
+#' }
+#' Where:
+#' 
+#' * (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{x'_{i,j}}) is the log normalized expression value for feature 
+#' \eqn{i} in sample \eqn{j}
+#' * (\eqn{a}) is the log base
+#' * (\eqn{b}) is an offset value
+#' 
+#' @section params:
+#' 
+#' \tabular{ll}{
+#'   `base` \tab numeric (default = 2) log base to use. Expressed as \eqn{a} in
+#'   the above equation. \cr
+#'   `offset` \tab numeric (default = 1). Offset to add to expression values to
+#'   avoid \eqn{\log(0)}. Expressed as \eqn{b} in the above equation.
+#' }
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_osmfish
+#' @title osmFISH Normalization
+#' @description
+#' Normalization method as provided by the osmFISH paper
+#' 
+#' Steps:
+#' 
+#' 1. First normalize genes, for each gene divide the counts by the total gene 
+#' count and multiply by the total number of genes.
+#' 2. Next normalize cells, for each cell divide the normalized gene counts by
+#' the total counts per cell and multiply by the total number of cells.
+#' 
+#' \deqn{\LARGE
+#' x'_{i,j} = \frac{x_{i,j}}{\sum_j x_{i,j}} \times n_{\text{features}}
+#' }
+#'
+#' \deqn{\LARGE
+#' x''_{i,j} = \frac{x'_{i,j}}{\sum_i x'_{i,j}} \times n_{\text{samples}}
+#' }
+#' 
+#' Where:
+#' 
+#' * (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{x'_{i,j}}) is the feature normalized expression value
+#' * (\eqn{x''_{i,j}}) is the final normalized expression value after both
+#' feature and cell normalization
+#' * (\eqn{n_{\text{samples}}}) is the total number of cells
+#' (columns in matrix)
+#' * (\eqn{n_{\text{features}}}) is the total number of cells
+#' (rows in matrix)
+#' 
+#' @section params:
+#' None
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_pearson
+#' @title Lause/Kobak Pearson Residuals Normalization
+#' @description
+#' Calculate Pearson residuals with a dispersion adjustment, to identify cells
+#' that deviate significantly from what would be expected under independence. 
+#' The normalization divides by the standard deviation of the difference, which
+#' is adjusted by the dispersion parameter θ.
+#' 
+#' This normalization is designed for detection of highly variable features and
+#' dimension reduction and clustering.
+#' 
+#' \deqn{\LARGE
+#' z_{i,j} = \frac{x_{i,j} - \mu_{i,j}}{\sqrt{\mu_{i,j} + \mu_{i,j}^2 / \theta}}
+#' }
+#'
+#' \deqn{\LARGE
+#' \mu_{i,j} = \frac{r_i \cdot c_j}{N}
+#' }
+#' 
+#' Where:
+#' * (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{\mu_{i,j}}) is the expected value under the model
+#' * (\eqn{r_i}) is \eqn{\sum_j x_{i,j}}
+#' * (\eqn{c_j}) is \eqn{\sum_i x_{i,j}}
+#' * (\eqn{N}) is \eqn{\sum_{i,j} x_{i,j}}
+#' * (\eqn{\theta}) is a dispersion parameter
+#' * (\eqn{z_{i,j}}) is the Pearson residual clipped to the range 
+#' \eqn{[-\sqrt{n}, \sqrt{n}]} where \eqn{n} is the number of columns. This is 
+#' done to prevent extreme values from dominating the analysis.
+#' 
+#' # Note
+#' Scaling is not recommended after this normalization since it is already
+#' transforming the data to z-score-like values with a dispersion adjustment.
+#' It is also not recommended to use this with DGE analysis.
+#' 
+#' @section params:
+#' 
+#' \tabular{ll}{
+#'   `theta` \tab dispersion parameter expressed as \eqn{\theta} in the above
+#'   formula
+#' }
+#' 
+#' @references Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
+#' normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
+#' https://doi.org/10.1186/s13059-021-02451-7
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_quantile
+#' @title Quantile Normalization
+#' @description
+#' Quantile normalization makes the statistical distribution of values in each
+#' column identical by replacing the original values with the mean of the
+#' values at the same rank across all columns. This removes technical variation
+#' while preserving relative differences between features.
+#'
+#' Steps:
+#' 1. Rank the values within each column (average taken in case of ties)
+#' 2. Calculate the mean of values at the same rank across all columns
+#' 3. Replace each value with the mean value corresponding to its rank
+#'
+#' \deqn{\LARGE
+#' q_{i,j} = \bar{x}_{rank(i,j)}
+#' }
+#'
+#' Where:
+#' * (\eqn{rank(i,j)}) is the rank of feature \eqn{i} within column \eqn{j}
+#' * (\eqn{\bar{x}_{r}}) where \eqn{r = rank(i,j)} is the mean of values with
+#' rank \eqn{r} across all columns
+#' * (\eqn{q_{i,j}}) is the quantile-normalized value
+#' 
+#' # Note
+#' Library normalization and log normalization is recommended prior to this
+#' normalization.
+#' 
+#' @section params:
+#' None
+#'
+#' @references Bolstad, B.M., Irizarry, R.A., Astrand, M. et al. A comparison of
+#' normalization methods for high density oligonucleotide array data based on
+#' variance and bias. Bioinformatics 19, 185–193 (2003).
+#' https://doi.org/10.1093/bioinformatics/19.2.185
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_tfidf
+#' @title TF-IDF Normalization
+#' @description
+#' TF-IDF (Term Frequency-Inverse Document Frequency) normalization is borrowed 
+#' from natural language processing to identify features that are highly expressed 
+#' in specific samples but not widely expressed across the entire dataset.
+#' 
+#' \deqn{\LARGE
+#' TF_{i,j} = \frac{x_{i,j}}{\sum_{i} x_{i,j}}
+#' }
+#' 
+#' \deqn{\LARGE
+#' IDF_{i} = \log(1 + \frac{n_{samples}}{1 + n_{samples \: where \: feature \: i > 0}})
+#' }
+#' 
+#' \deqn{\LARGE
+#' TFIDF_{i,j} = TF_{i,j} \times IDF_{i}
+#' }
+#' 
+#' Where:
+#' * (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{TF_{i,j}}) is the term frequency of feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{IDF_{i}}) is the inverse document frequency of feature \eqn{i}
+#' * (\eqn{TFIDF_{i,j}}) is the final TF-IDF normalized value
+#' 
+#' # Note
+#' [L2][norm_l2] normalization is commonly performed after TF-IDF normalization
+#' 
+#' @section params:
+#' None
+#' @md
+#' @family normalization parameters
+#' @seealso [process_param]
+NULL
+
+#' @name norm_l2
+#' @title L2 Normalization
+#' @description
+#' L2 normalization (also known as Euclidean normalization) scales each column
+#' (sample) in the expression matrix to have unit Euclidean length. This
+#' process makes samples with different sequencing depths more comparable and
+#' improves the performance of distance-based analyses.
+#' 
+#' \deqn{\LARGE
+#' x'_{i,j} = \frac{x_{i,j}}{\sqrt{\sum_{i} x_{i,j}^2}}
+#' }
+#' 
+#' Where:
+#' * (\eqn{x_{i,j}}) is the expression value for feature \eqn{i} in sample \eqn{j}
+#' * (\eqn{x'_{i,j}}) is the L2-normalized expression value
+#' 
+#' @section Note:
+#' L2 normalization can be applied to raw data, but is most commonly used after 
+#' other normalization methods such as TF-IDF or log normalization to standardize
+#' sample-to-sample comparisons.
+#' 
+#' @section params:
+#' None
+#' 
+#' @family normalization parameters
+#' @seealso [process_param]
+#' @md
+NULL
+
+#' @name scale_default
+#' @title Default Giotto Scaling
+#' @description
+#' 2 step [z-scoring][scale_zscore] along features and samples
+#' @section params: 
+#' 
+#' \tabular{ll}{
+#'   `scale_feats` \tab logical (default = `TRUE`) Whether to scale across
+#'   features \cr
+#'   `scale_cells` \tab logical (default = `TRUE`) Whether to scale across
+#'   cells/samples \cr
+#'   `scale_order` \tab character. One of either `"first_feats"` or 
+#'   `"first_cells"`. When both `scale_feats` and `scale_cells` are `TRUE`,
+#'   determines the order in which the 2 scaling operations are performed. \cr
+#'   `verbose` \tab logical (default = `TRUE`) Whether to be verbose
+#' }
+#' 
+#' @md
+#' @family scaling parameters
+#' @seealso [process_param]
+NULL
+
+#' @name scale_zscore
+#' @title Z Score Scaling
+#' @description
+#' Wrapper around `base::scale()` to make it compatible with the
+#' [processData()] framework. Additionally provides a `MARGIN` param.
+#' 
+#' \deqn{\LARGE
+#' z_{i,j} = \frac{x_{i,j} - \mu_i}{\sigma_i}
+#' }
+#'
+#' Where:
+#' * \eqn{x_{i,j}} is the original value for feature \eqn{i} in sample \eqn{j}
+#' * \eqn{\mu_i} is the mean of feature \eqn{i} across all samples
+#' * \eqn{\sigma_i} is the standard deviation of feature \eqn{i} across all 
+#' samples
+#' * \eqn{z_{i,j}} is the resulting scaled value
+#' 
+#' @section params: 
+#' 
+#' \tabular{ll}{
+#'   `scale` \tab logical (default = `TRUE`) Whether to scale values \cr
+#'   `center` \tab logical (default = `TRUE`) Whether to center values\cr
+#'   `MARGIN` \tab numeric. Either 1 (rows) or 2 (cols). Direction along which
+#'   to perform the operation.
+#' }
+#' @md
+#' @family scaling parameters
+#' @seealso [process_param]
+NULL
+
+
+
+
+
+# VIRTUAL classes ####
+setClass("normParam", contains = c("VIRTUAL", "processParam"))
+setClass("scaleParam", contains = c("VIRTUAL", "processParam"))
+setClass("adjustParam", contains = c("VIRTUAL", "processParam"))
+
+# access ####
+.DollarNames.scaleParam <- function(x, pattern) {
+    names(x@param)
+}
+.DollarNames.normParam <- function(x, pattern) {
+    names(x@param)
+}
+.DollarNames.adjustParam <- function(x, pattern) {
+    names(x@param)
+}
+
+# extending method classes ####
+setClass("defaultNormParam", contains = "normParam")
+setClass("libraryNormParam", contains = "normParam")
+setClass("logNormParam", contains = "normParam")
+setClass("osmFISHNormParam", contains = "normParam")
+setClass("pearsonResidNormParam", contains = "normParam")
+setClass("quantileNormParam", contains = "normParam")
+setClass("tfidfNormParam", contains = "normParam")
+setClass("l2NormParam", contains = "normParam")
+
+setClass("defaultScaleParam", contains = "scaleParam")
+setClass("zscoreScaleParam", contains = "scaleParam")
+
+setClass("limmaAdjustParam", contains = "adjustParam")
+
+# allMatrix signature ####
+setClassUnion("allMatrix", members = c("matrix", "Matrix"))
+
+
+# params setup ####
+.norm_param_lib <- function(...) {
+    p <- new("libraryNormParam", param = list(...))
+    p$scalefactor <- p$scalefactor %null% 6e3
+    p
+}
+.norm_param_log <- function(...) {
+    p <- new("logNormParam", param = list(...))
+    p$base <- p$base %null% 2
+    p$offset <- p$offset %null% 1
+    p
+}
+.norm_param_osmfish <- function(...) {
+    new("osmFISHNormParam", param  = list(...))
+}
+.norm_param_pears_resid <- function(...) {
+    p <- new("pearsonResidNormParam", param = list(...))
+    p$theta <- p$theta %null% 100
+    p
+}
+.norm_param_quantile <- function(...) {
+    new("quantileNormParam", param = list(...))
+}
+.norm_param_default <- function(...) {
+    p <- new("defaultNormParam", param = list(...))
+    p$library_size_norm <- p$library_size_norm %null% TRUE
+    p$scalefactor <- p$scalefactor %null% 6e3
+    p$log_norm <- p$log_norm %null% TRUE
+    p$log_offset <- p$log_offset %null% 1
+    p$logbase <- p$logbase %null% 2
+    p
+}
+.norm_param_tfidf <- function(...) {
+    new("tfidfNormParam", param = list(...))
+}
+.norm_param_l2 <- function(...) {
+    new("l2NormParam", param = list(...))
+}
+
+.scale_param_zscore <- function(...) {
+    p <- new("zscoreScaleParam", param = list(...))
+    p$scale <- p$scale %null% TRUE
+    p$center <- p$center %null% TRUE
+    p$MARGIN <- p$MARGIN %null% 2
+    p
+}
+.scale_param_default <- function(...) {
+    p <- new("defaultScaleParam", param = list(...))
+    p$scale_feats <- p$scale_feats %null% TRUE
+    p$scale_cells <- p$scale_cells %null% TRUE
+    p$scale_order <- p$scale_order %null% c("first_feats", "first_cells")
+    p$verbose <- p$verbose %null% TRUE
+    p
+}
+
+
+.adjust_param_limma <- function(...) {
+    p <- new("limmaAdjustParam", param = list(...))
+    p@param <- if (is.null(p@param$batch_columns)) {
+        c(p@param, list(batch_columns = NULL))
+    }
+    p@param <- if (is.null(p@param$covariate_columns)) {
+        c(p@param, list(covariate_columns = NULL))
+    }
+    p
+}
+
+# param factories ####
+
+#' @rdname process_param
+#' @export
+normParam <- function(method = "default", ...) {
+    method <- match.arg(tolower(method),
+        c("default", "library", "log", "osmfish", "pearson", "quantile", 
+          "tf-idf", "l2")
+    )
+    switch(method,
+        "default" = .norm_param_default(...),
+        "library" = .norm_param_lib(...),
+        "log" = .norm_param_log(...),
+        "osmfish" = .norm_param_osmfish(...),
+        "pearson" = .norm_param_pears_resid(...),
+        "quantile" = .norm_param_quantile(...),
+        "tf-idf" = .norm_param_tfidf(...),
+        "l2" = .norm_param_l2(...)
+    )
+}
+
+#' @rdname process_param
+#' @export
+scaleParam <- function(method = "default", ...) {
+    method <- match.arg(tolower(method),
+        c("default", "zscore")
+    )
+    switch(method,
+        "default" = .scale_param_default(...),
+        "zscore" = .scale_param_zscore(...)
+    )
+}
+
+#' @rdname process_param
+#' @export
+adjustParam <- function(method = "limma", ...) {
+    method <- match.arg(tolower(method),
+        c("limma")
+    )
+    switch(method,
+        "limma" = .adjust_param_limma(...)
+    )
+}
+
+
+
+# methods ####
+
+# * ANY ####
+
+setMethod("processData",
+signature(x = "ANY", param = "ANY"), function(x, param) {
+    stop(wrap_txtf("param of class '%s' is not recognized for use with '%s'", 
+                   class(param), class(x)),
+         call. = FALSE)
+})
+
+setMethod("processData",
+    signature(x = "ANY", param = "adjustParam"), function(x, param) {
+        "<adjustParam> "    
+    })
+
+# * exprObj ####
+
+#' @rdname processData
+setMethod("processData",
+    signature(x = "exprObj", param = "list"),
+    function(x, param, name = "scaled") {
+        x[] <- processData(x[], param)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+#' @rdname processData
+setMethod("processData",
+    signature(x = "exprObj", param = "normParam"), 
+    function(x, param, name = "normalized") {
+        x[] <- processData(x[], param)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+# specialized handling for osmfish
+setMethod("processData",
+    signature(x = "exprObj", param = "osmFISHNormParam"), 
+    function(x, param, name = "custom") {
+        if (!featType(x) %in% c("rna", "RNA")) {
+            warning("Caution: osmFISH normalization was developed for RNA in situ data",
+                    call. = FALSE)
+        }
+        x[] <- processData(x[], param)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+# specialized handling for pearson residual
+setMethod("processData",
+    signature(x = "exprObj", param = "pearsonResidNormParam"), 
+    function(x, param, name = "scaled") {
+        if (!featType(x) %in% c("rna", "RNA")) {
+            warning("Caution: pearson residual normalization was developed for RNA count normalization",
+                    call. = FALSE)
+        }
+        x[] <- processData(x[], param)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+#' @rdname processData
+setMethod("processData",
+    signature(x = "exprObj", param = "scaleParam"),
+    function(x, param, name = "scaled") {
+        x[] <- processData(x[], param)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+
+# * matrix ####
+
+# ** param list ####
+
+#' @rdname processData
+setMethod("processData",
+    signature(x = "allMatrix", param = "list"),
+    function(x, param) {
+        for (p in param) {
+            x <- processData(x, p)
+        }
+        return(x)
+    }
+)
+
+# ** norm ------------------ ####
+# *** library norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "libraryNormParam"),
+    function(x, param) {
+        .lib_norm_giotto(mymatrix = x, scalefactor = param$scalefactor)
+    }
+)
+# *** log norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "logNormParam"),
+    function(x, param) {
+        log(x + param$offset) / log(param$base)
+    }
+)
+setMethod("processData",
+    signature(x = "Matrix", param = "logNormParam"),
+    function(x, param) {
+        x@x <- log(x@x + param$offset) / log(param$base)
+        x
+    }
+)
+# *** osmFISH norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "osmFISHNormParam"),
+    function(x, param) {
+        # 1. normalize raw expr per gene with scale-factor equal to number of genes
+        norm_feats <- (x / rowSums_flex(x)) * nrow(x)
+        # 2. normalize per cells with scale-factor equal to number of cells
+        t_flex((t_flex(norm_feats) / colSums_flex(norm_feats)) * ncol(x))
+    }
+)
+# *** pearson norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "pearsonResidNormParam"),
+    function(x, param) {
+        .pears_resid_citation(verbose = param$verbose)
+        .csums <- .csum_nodrop.Matrix
+        .rsums <- .rsum_nodrop.Matrix
+        .prnorm(
+            x = raw_expr[], 
+            theta = param$theta, 
+            .csums = .csums,
+            .rsums = .rsums
+        )
+    }
+)
+# *** quantile norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "quantileNormParam"),
+    function(x, param) {
+        .qnorm(x)
+    }
+)
+# *** tf-idf norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "tfidfNormParam"),
+    function(x, param) {
+        # compute term frequency (TF)
+        tf <- x / rowSums_flex(x)
+        # compute inverse document frequency (IDF)
+        idf <- log(1 + ncol(x) / (1 + rowSums_flex(x > 0)))
+        # apply TF-IDF
+        tf * idf
+    }
+)
+# *** default norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "defaultNormParam"),
+    function(x, param) {
+        plist <- list()
+        # 1. library size normalization
+        if (isTRUE(param$library_size_norm)) {
+            plist <- c(plist, normParam("library", 
+                scalefactor = param$scalefactor))
+        }
+        # 2. log normalize
+        if (isTRUE(param$log_norm)) {
+            plist <- c(plist, normParam("log", 
+                logbase = param$logbase, 
+                log_offset = param$log_offset)
+            )
+        }
+        processData(x, plist)
+    }
+)
+# *** L2 norm ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "l2NormParam"),
+    function(x, param) {
+        .l2_norm(x)
+    }
+)
+
+# ** scale ----------------- ####
+# *** zscore scale ####
+setMethod("processData",
+    signature("allMatrix", param = "zscoreScaleParam"), 
+    function(x, param, ...) {
+        if (!param$MARGIN %in% c(1, 2)) {
+            stop("processData zscore: 'MARGIN' must be either 1 (rows) or 2 (cols)", 
+                 call. = FALSE)
+        }
+        if (param$MARGIN == 1) x <- t_flex(x)
+        x <- standardise_flex(x, center = param$center, scale = param$scale)
+        if (param$MARGIN == 1) x <- t_flex(x)
+        return(x)
+    })
+# *** default scale ####
+setMethod("processData",
+    signature(x = "allMatrix", param = "defaultScaleParam"),
+    function(x, param, ...) {
+        plist <- list()
+        s1 <-scaleParam("zscore", center = TRUE, scale = TRUE, MARGIN = 1)
+        s2 <-scaleParam("zscore", center = TRUE, scale = TRUE, MARGIN = 2)
+        if (isTRUE(param$scale_feats) && isTRUE(param$scale_cells)) {
+            scale_order <- match.arg(param$scale_order,
+                                     choices = c("first_feats", "first_cells")
+            )
+            if (scale_order == "first_feats") {
+                vmsg(.v = param$verbose, "first scale feats and then cells")
+                plist <- c(plist, s1, s2)
+            } else if (scale_order == "first_cells") {
+                vmsg(.v = param$verbose, "first scale cells and then feats")
+                plist <- c(plist, s2, s1)
+            } else {
+                stop("processData defaultNormParam: scale order must be given", 
+                     call. = FALSE)
+            }
+        } else if (isTRUE(param$scale_feats)) {
+            plist <- c(plist, s1)
+        } else if (isTRUE(param$scale_cells)) {
+            plist <- c(plist, s2)
+        }
+        processData(x, plist)
+    }
+)
+
+
+
+
+
+processExpression <- function(gobject, param, name,
+    expression_values = "raw",
+    spat_unit = NULL, 
+    feat_type = NULL, 
+    return_gobject = TRUE) {
+    ex <- getExpression(gobject,
+        values = expression_values,
+        spat_unit = spat_unit,
+        feat_type = feat_type,
+        output = "exprObj",
+        set_defaults = TRUE
+    )
+    res <- processData(ex, param, name = name)
+    if(!isTRUE(return_gobject)) return(res)
+    setGiotto(gobject, res)
+}
+
+
+
+
+
 #' @title normalizeGiotto
 #' @name normalizeGiotto
 #' @description fast normalize and/or scale expression values of Giotto object
@@ -186,8 +993,35 @@ normalizeGiotto <- function(gobject,
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
 # internals ####
 
+.l2_norm <- function(x) {
+    # Calculate column norms (Euclidean length of each column)
+    col_norms <- sqrt(colSums_flex(x^2))
+    # Avoid division by zero
+    col_norms[col_norms == 0] <- 1
+    # Normalize each column
+    t_flex(t_flex(x) / col_norms)
+}
+
+.pears_resid_citation <- function(verbose = NULL) {
+    vmsg(.v = verbose, "using 'Lause/Kobak' method to normalize count matrix.
+    If used in published research, please cite:
+    Jan Lause, Philipp Berens, Dmitry Kobak (2020).
+    'Analytic Pearson residuals for normalization of single-cell RNA-seq UMI data'")
+}
 
 #' @title Normalize expression matrix for library size
 #' @param mymatrix matrix object
@@ -200,10 +1034,11 @@ normalizeGiotto <- function(gobject,
 
     if (0 %in% libsizes) {
         warning(wrap_txt("Total library size or counts for individual spat
-                    units are 0.
-                    This will likely result in normalization problems.
-                    filter (filterGiotto) or impute (imputeGiotto) spatial
-                    units."))
+            units are 0.
+            This will likely result in normalization problems.
+            filter (filterGiotto) or impute (imputeGiotto) spatial
+            units.")
+        )
     }
 
     norm_expr <- t_flex(t_flex(mymatrix) / libsizes) * scalefactor
diff --git a/R/package_imports.R b/R/package_imports.R
index c9f96dd1e..912486b99 100644
--- a/R/package_imports.R
+++ b/R/package_imports.R
@@ -11,4 +11,5 @@
 #' @importFrom data.table frank
 #' @importFrom data.table fread
 #' @importFrom data.table merge.data.table
+#' @importClassesFrom Matrix Matrix
 NULL
diff --git a/R/zzz.R b/R/zzz.R
index 3c024dc1c..69af3ffdf 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -43,3 +43,36 @@
     # ----------- #
     init_option("giotto.verbose", TRUE)
 }
+
+.onLoad <- function(libname, pkgname) {
+    # extensible classunions --------------------------------------------#
+    all_matrix <- c("matrix", "Matrix")
+    update_matrix_sig <- FALSE
+    if (requireNamespace("DelayedArray", quietly = TRUE)) {
+        getClass("DelayedArray")
+        all_matrix <- c(all_matrix, "DelayedArray")
+        update_matrix_sig <- TRUE
+    }
+    if (requireNamespace("dbMatrix", quietly = TRUE)) {
+        getClass("dbMatrix")
+        all_matrix <- c(all_matrix, "dbMatrix")
+        update_matrix_sig <- TRUE
+    }
+    
+    if (isTRUE(update_matrix_sig)) {
+        setClassUnion("allMatrix", members = all_matrix)
+    }
+    # methods extensions ------------------------------------------------#
+    
+    if (requireNamespace("dbMatrix", quietly = TRUE)) {
+    setMethod("processData",
+              
+              signature(x = "dbMatrix", param = "logNormParam"),
+              function(x, param) {
+                  x[] <- dplyr::mutate(x[], x = x + param$offset)
+                  # workaround for lack of @x slot
+                  mymatrix <- log(mymatrix) / log(base)
+              }
+    )
+}
+}
diff --git a/man/norm_default.Rd b/man/norm_default.Rd
new file mode 100644
index 000000000..79516df50
--- /dev/null
+++ b/man/norm_default.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_default}
+\alias{norm_default}
+\title{Default Giotto Normalization}
+\description{
+Expression matrix normalization method.
+
+Steps:
+\enumerate{
+\item \link[=norm_library]{Total library size} normalization and scaling by
+a custom scale-factor.
+\item \link[=norm_log]{Log} transformation of data.
+}
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{library_size_norm} \tab logical (default = \code{TRUE}). whether to perform
+library size normalization \cr
+\code{scalefactor} \tab numeric (default = 6000). Scalefactor to use after
+library size normalization. (skipped if \code{library_size_norm = FALSE}) \cr
+\code{log_norm} \tab logical (default = \code{TRUE}). Whether to transform values to
+log-scale. \cr
+\code{log_offset} \tab numeric (default = 1). If \code{log_norm = TRUE}, offset
+value to add to expression values to avoid \code{log(0)} \cr
+\code{logbase} \tab numeric (default = 2). If \code{log_norm = TRUE}, log base to
+use to log normalize expression values
+}
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_l2.Rd b/man/norm_l2.Rd
new file mode 100644
index 000000000..43e7e28af
--- /dev/null
+++ b/man/norm_l2.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_l2}
+\alias{norm_l2}
+\title{L2 Normalization}
+\description{
+L2 normalization (also known as Euclidean normalization) scales each column
+(sample) in the expression matrix to have unit Euclidean length. This
+process makes samples with different sequencing depths more comparable and
+improves the performance of distance-based analyses.
+
+\deqn{\LARGE
+x'_{i,j} = \frac{x_{i,j}}{\sqrt{\sum_{i} x_{i,j}^2}}
+}
+
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the expression value for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{x'_{i,j}}) is the L2-normalized expression value
+}
+}
+\section{Note}{
+
+L2 normalization can be applied to raw data, but is most commonly used after
+other normalization methods such as TF-IDF or log normalization to standardize
+sample-to-sample comparisons.
+}
+
+\section{params}{
+
+None
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_library.Rd b/man/norm_library.Rd
new file mode 100644
index 000000000..47d3f0a8b
--- /dev/null
+++ b/man/norm_library.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_library}
+\alias{norm_library}
+\title{Library Size Normalization}
+\description{
+Normalize expression matrix for total library size and then scale by
+a custom scalefactor.
+
+This method does not work well when any cells/samples
+have a library size of 0, so filtering prior to this is recommended.
+
+\deqn{\LARGE
+x'_{i,j} = \frac{x_{i,j}}{\sum_{i} x_{i,j}} \times k
+}
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{x'_{i,j}}) is the library normalized and scaled expression value for
+feature \eqn{i} in sample \eqn{j}
+\item (k) is a scalefactor applied after normalization
+}
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{scalefactor} \tab numeric (default = 6000). Scalefactor to use after
+library size normalization. Expressed as \emph{\strong{k}} in the above equation
+}
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_log.Rd b/man/norm_log.Rd
new file mode 100644
index 000000000..e4b436fb5
--- /dev/null
+++ b/man/norm_log.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_log}
+\alias{norm_log}
+\title{Log Normalization}
+\description{
+Apply a log normalization
+
+\deqn{\LARGE
+x'_{i,j} = \frac{\log(x_{i,j} + b)}{\log(a)}
+}
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{x'_{i,j}}) is the log normalized expression value for feature
+\eqn{i} in sample \eqn{j}
+\item (\eqn{a}) is the log base
+\item (\eqn{b}) is an offset value
+}
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{base} \tab numeric (default = 2) log base to use. Expressed as \eqn{a} in
+the above equation. \cr
+\code{offset} \tab numeric (default = 1). Offset to add to expression values to
+avoid \eqn{\log(0)}. Expressed as \eqn{b} in the above equation.
+}
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_osmfish.Rd b/man/norm_osmfish.Rd
new file mode 100644
index 000000000..c5abb3989
--- /dev/null
+++ b/man/norm_osmfish.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_osmfish}
+\alias{norm_osmfish}
+\title{osmFISH Normalization}
+\description{
+Normalization method as provided by the osmFISH paper
+
+Steps:
+\enumerate{
+\item First normalize genes, for each gene divide the counts by the total gene
+count and multiply by the total number of genes.
+\item Next normalize cells, for each cell divide the normalized gene counts by
+the total counts per cell and multiply by the total number of cells.
+}
+
+\deqn{\LARGE
+x'_{i,j} = \frac{x_{i,j}}{\sum_j x_{i,j}} \times n_{\text{features}}
+}
+
+\deqn{\LARGE
+x''_{i,j} = \frac{x'_{i,j}}{\sum_i x'_{i,j}} \times n_{\text{samples}}
+}
+
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{x'_{i,j}}) is the feature normalized expression value
+\item (\eqn{x''_{i,j}}) is the final normalized expression value after both
+feature and cell normalization
+\item (\eqn{n_{\text{samples}}}) is the total number of cells
+(columns in matrix)
+\item (\eqn{n_{\text{features}}}) is the total number of cells
+(rows in matrix)
+}
+}
+\section{params}{
+
+None
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_pearson.Rd b/man/norm_pearson.Rd
new file mode 100644
index 000000000..204d2cf60
--- /dev/null
+++ b/man/norm_pearson.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_pearson}
+\alias{norm_pearson}
+\title{Lause/Kobak Pearson Residuals Normalization}
+\description{
+Calculate Pearson residuals with a dispersion adjustment, to identify cells
+that deviate significantly from what would be expected under independence.
+The normalization divides by the standard deviation of the difference, which
+is adjusted by the dispersion parameter θ.
+
+This normalization is designed for detection of highly variable features and
+dimension reduction and clustering.
+
+\deqn{\LARGE
+z_{i,j} = \frac{x_{i,j} - \mu_{i,j}}{\sqrt{\mu_{i,j} + \mu_{i,j}^2 / \theta}}
+}
+
+\deqn{\LARGE
+\mu_{i,j} = \frac{r_i \cdot c_j}{N}
+}
+
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{\mu_{i,j}}) is the expected value under the model
+\item (\eqn{r_i}) is \eqn{\sum_j x_{i,j}}
+\item (\eqn{c_j}) is \eqn{\sum_i x_{i,j}}
+\item (\eqn{N}) is \eqn{\sum_{i,j} x_{i,j}}
+\item (\eqn{\theta}) is a dispersion parameter
+\item (\eqn{z_{i,j}}) is the Pearson residual clipped to the range
+\eqn{[-\sqrt{n}, \sqrt{n}]} where \eqn{n} is the number of columns. This is
+done to prevent extreme values from dominating the analysis.
+}
+}
+\section{Note}{
+Scaling is not recommended after this normalization since it is already
+transforming the data to z-score-like values with a dispersion adjustment.
+It is also not recommended to use this with DGE analysis.
+}
+
+\section{params}{
+
+
+\tabular{ll}{
+\code{theta} \tab dispersion parameter expressed as \eqn{\theta} in the above
+formula
+}
+}
+
+\references{
+Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
+normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021).
+https://doi.org/10.1186/s13059-021-02451-7
+}
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_quantile}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_quantile.Rd b/man/norm_quantile.Rd
new file mode 100644
index 000000000..0b07320e6
--- /dev/null
+++ b/man/norm_quantile.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_quantile}
+\alias{norm_quantile}
+\title{Quantile Normalization}
+\description{
+Quantile normalization makes the statistical distribution of values in each
+column identical by replacing the original values with the mean of the
+values at the same rank across all columns. This removes technical variation
+while preserving relative differences between features.
+
+Steps:
+\enumerate{
+\item Rank the values within each column (average taken in case of ties)
+\item Calculate the mean of values at the same rank across all columns
+\item Replace each value with the mean value corresponding to its rank
+}
+
+\deqn{\LARGE
+q_{i,j} = \bar{x}_{rank(i,j)}
+}
+
+Where:
+\itemize{
+\item (\eqn{rank(i,j)}) is the rank of feature \eqn{i} within column \eqn{j}
+\item (\eqn{\bar{x}_{r}}) where \eqn{r = rank(i,j)} is the mean of values with
+rank \eqn{r} across all columns
+\item (\eqn{q_{i,j}}) is the quantile-normalized value
+}
+}
+\section{Note}{
+Library normalization and log normalization is recommended prior to this
+normalization.
+}
+
+\section{params}{
+
+None
+}
+
+\references{
+Bolstad, B.M., Irizarry, R.A., Astrand, M. et al. A comparison of
+normalization methods for high density oligonucleotide array data based on
+variance and bias. Bioinformatics 19, 185–193 (2003).
+https://doi.org/10.1093/bioinformatics/19.2.185
+}
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_tfidf}}
+}
+\concept{normalization parameters}
diff --git a/man/norm_tfidf.Rd b/man/norm_tfidf.Rd
new file mode 100644
index 000000000..2122b4923
--- /dev/null
+++ b/man/norm_tfidf.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{norm_tfidf}
+\alias{norm_tfidf}
+\title{TF-IDF Normalization}
+\description{
+TF-IDF (Term Frequency-Inverse Document Frequency) normalization is borrowed
+from natural language processing to identify features that are highly expressed
+in specific samples but not widely expressed across the entire dataset.
+
+\deqn{\LARGE
+TF_{i,j} = \frac{x_{i,j}}{\sum_{i} x_{i,j}}
+}
+
+\deqn{\LARGE
+IDF_{i} = \log(1 + \frac{n_{samples}}{1 + n_{samples \: where \: feature \: i > 0}})
+}
+
+\deqn{\LARGE
+TFIDF_{i,j} = TF_{i,j} \times IDF_{i}
+}
+
+Where:
+\itemize{
+\item (\eqn{x_{i,j}}) is the raw count for feature \eqn{i} in sample \eqn{j}
+\item (\eqn{TF_{i,j}}) is the term frequency of feature \eqn{i} in sample \eqn{j}
+\item (\eqn{IDF_{i}}) is the inverse document frequency of feature \eqn{i}
+\item (\eqn{TFIDF_{i,j}}) is the final TF-IDF normalized value
+}
+}
+\section{Note}{
+\link[=norm_l2]{L2} normalization is commonly performed after TF-IDF normalization
+}
+
+\section{params}{
+
+None
+}
+
+\seealso{
+\link{process_param}
+
+Other normalization parameters: 
+\code{\link{norm_default}},
+\code{\link{norm_l2}},
+\code{\link{norm_library}},
+\code{\link{norm_log}},
+\code{\link{norm_osmfish}},
+\code{\link{norm_pearson}},
+\code{\link{norm_quantile}}
+}
+\concept{normalization parameters}
diff --git a/man/processData.Rd b/man/processData.Rd
new file mode 100644
index 000000000..aee07aaeb
--- /dev/null
+++ b/man/processData.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{processData}
+\alias{processData}
+\alias{processData,exprObj,list-method}
+\alias{processData,exprObj,normParam-method}
+\alias{processData,exprObj,scaleParam-method}
+\alias{processData,allMatrix,list-method}
+\title{Composable Data Processing}
+\usage{
+\S4method{processData}{exprObj,list}(x, param, name = "scaled")
+
+\S4method{processData}{exprObj,normParam}(x, param, name = "normalized")
+
+\S4method{processData}{exprObj,scaleParam}(x, param, name = "scaled")
+
+\S4method{processData}{allMatrix,list}(x, param)
+}
+\arguments{
+\item{x}{data to transform}
+
+\item{param}{S4 parameter class defining the transform operation and
+params affecting it.}
+
+\item{name}{character. \link[GiottoClass:giotto_schema]{Object name} to assign
+to the output.}
+
+\item{\dots}{additional params to pass}
+}
+\description{
+Perform data transformations, or set up chains of transformations and
+operations to be applied to matrix type data. \code{processData()} is a generic
+for which methods can be defined off both \code{x} (the data to transform),
+and \code{param} (the transform operation).
+}
+\examples{
+m <- matrix(c(0, 0, 3, 2, 0, 5, 4, 0, 0, 1, 12, 0), nrow = 3)
+
+# single operation
+lib_norm <- normParam("library")
+lib_norm$scalefactor <- 5000 # alter a default param of library norm
+processData(m, lib_norm)
+
+# chained operations
+log_norm <- normParam("log")
+zscore_cols <- scaleParam("zscore")
+zscore_rows <- scaleParam("zscore", MARGIN = 1)
+# this is essentially the same as the default giotto normalization
+# only difference is the library norm scalefactor change.
+processData(m, list(lib_norm, log_norm, zscore_cols, zscore_rows))
+}
+\seealso{
+\link{process_param} for processing operations that can be performed
+through \code{processData()}
+}
diff --git a/man/process_param.Rd b/man/process_param.Rd
new file mode 100644
index 000000000..cdeaf46fb
--- /dev/null
+++ b/man/process_param.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{process_param}
+\alias{process_param}
+\alias{normParam}
+\alias{scaleParam}
+\alias{adjustParam}
+\title{Data Processing Parameter Class Factories}
+\usage{
+normParam(method = "default", ...)
+
+scaleParam(method = "default", ...)
+
+adjustParam(method = "limma", ...)
+}
+\arguments{
+\item{method}{character. Name of method to use. See details.}
+
+\item{\dots}{(optional) Additional named parameters relevant to the param
+class.}
+}
+\description{
+Data processing operations in Giotto Suite can be divided into
+normalization, scaling, and adjustments
+}
+\section{normParam methods}{
+
+\itemize{
+\item \code{\link[=norm_default]{"default"}} - default Giotto normalizations steps
+(library + log norms)
+\item \code{\link[=norm_library]{"library"}} - library normalization
+\item \code{\link[=norm_log]{"log"}} - log normalization
+\item \code{\link[=norm_osmfish]{"osmfish"}} - osmfish normalization method
+\item \code{\link[=norm_pearson]{"pearson"}} - Lause/Kobak 2020 pearson residuals
+normalization
+\item \code{\link[=norm_quantile]{"quantile"}} - quantile normalization
+\item \code{\link[=norm_tfidf]{"tf-idf"}} - Term Frequency-Inverse Document Frequency
+\item \code{\link[=norm_l2]{"l2"}} - L2 normalization (also known as Euclidean
+normalization)
+}
+}
+
+\section{scaleParam methods}{
+
+\itemize{
+\item \code{\link[=scale_default]{"default"}} - default Giotto scaling steps (scale along
+features then cells)
+\item \code{\link[=scale_zscore]{"zscore"}} - essentially the same as \code{base::scale()}, but
+with a \code{MARGIN} param allowing scaling long either cols or rows
+}
+}
+
+\section{adjustParam methods}{
+
+\itemize{
+\item \code{"limma"} - limma batch correction
+}
+}
+
diff --git a/man/scale_default.Rd b/man/scale_default.Rd
new file mode 100644
index 000000000..e262af4ce
--- /dev/null
+++ b/man/scale_default.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{scale_default}
+\alias{scale_default}
+\title{Default Giotto Scaling}
+\description{
+2 step \link[=scale_zscore]{z-scoring} along features and samples
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{scale_feats} \tab logical (default = \code{TRUE}) Whether to scale across
+features \cr
+\code{scale_cells} \tab logical (default = \code{TRUE}) Whether to scale across
+cells/samples \cr
+\code{scale_order} \tab character. One of either \code{"first_feats"} or
+\code{"first_cells"}. When both \code{scale_feats} and \code{scale_cells} are \code{TRUE},
+determines the order in which the 2 scaling operations are performed. \cr
+\code{verbose} \tab logical (default = \code{TRUE}) Whether to be verbose
+}
+}
+
+\seealso{
+\link{process_param}
+
+Other scaling parameters: 
+\code{\link{scale_zscore}}
+}
+\concept{scaling parameters}
diff --git a/man/scale_zscore.Rd b/man/scale_zscore.Rd
new file mode 100644
index 000000000..08c740deb
--- /dev/null
+++ b/man/scale_zscore.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{scale_zscore}
+\alias{scale_zscore}
+\title{Z Score Scaling}
+\description{
+Wrapper around \code{base::scale()} to make it compatible with the
+\code{\link[=processData]{processData()}} framework. Additionally provides a \code{MARGIN} param.
+
+\deqn{\LARGE
+z_{i,j} = \frac{x_{i,j} - \mu_i}{\sigma_i}
+}
+
+Where:
+\itemize{
+\item \eqn{x_{i,j}} is the original value for feature \eqn{i} in sample \eqn{j}
+\item \eqn{\mu_i} is the mean of feature \eqn{i} across all samples
+\item \eqn{\sigma_i} is the standard deviation of feature \eqn{i} across all
+samples
+\item \eqn{z_{i,j}} is the resulting scaled value
+}
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{scale} \tab logical (default = \code{TRUE}) Whether to scale values \cr
+\code{center} \tab logical (default = \code{TRUE}) Whether to center values\cr
+\code{MARGIN} \tab numeric. Either 1 (rows) or 2 (cols). Direction along which
+to perform the operation.
+}
+}
+
+\seealso{
+\link{process_param}
+
+Other scaling parameters: 
+\code{\link{scale_default}}
+}
+\concept{scaling parameters}

From be6c343101946305a3dd52be4502d2017828dd91 Mon Sep 17 00:00:00 2001
From: George Chen <72078254+jiajic@users.noreply.github.com>
Date: Fri, 28 Feb 2025 17:32:35 -0500
Subject: [PATCH 2/2] feat: limma via `processData()` and docs

---
 NAMESPACE                |   4 +
 NEWS.md                  |   4 +
 R/normalize.R            | 377 +++++++++++++++++++++++++++++----------
 man/adjust_limma.Rd      |  32 ++++
 man/processData.Rd       |  20 ++-
 man/processExpression.Rd |  75 ++++++++
 man/process_param.Rd     |   8 +-
 7 files changed, 415 insertions(+), 105 deletions(-)
 create mode 100644 man/adjust_limma.Rd
 create mode 100644 man/processExpression.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 508e29ff6..a0c754800 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,9 @@
 S3method(.DollarNames,CosmxReader)
 S3method(.DollarNames,VisiumHDReader)
 S3method(.DollarNames,XeniumReader)
+S3method(.DollarNames,adjustParam)
+S3method(.DollarNames,normParam)
+S3method(.DollarNames,scaleParam)
 export("%>%")
 export("activeFeatType<-")
 export("activeSpatUnit<-")
@@ -339,6 +342,7 @@ export(polyStamp)
 export(preprocessImageToMatrix)
 export(print.combIcfObject)
 export(print.icfObject)
+export(processExpression)
 export(processGiotto)
 export(prov)
 export(rankSpatialCorGroups)
diff --git a/NEWS.md b/NEWS.md
index ba8231671..7e713b6ed 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,10 @@
 * `importCosMx()` now supports vectors of filepaths when provided to `$load_images()` and `$load_polys()`
 * `importCosMx()` Selected FOVs are now selected in `plot()`.
 
+## New
+* `processExpression()` for `giotto` implemented via the `processData()` framework in {GiottoClass} v0.4.7 (see `?processData` and `?process_param`)
+* `L2` and `TF-IDF` normalization methods accessible via the `processData()` framework
+
 # Giotto 4.2.0 (2025/01/17)
 
 ## Breaking Changes
diff --git a/R/normalize.R b/R/normalize.R
index c6085b504..9cca5a20a 100644
--- a/R/normalize.R
+++ b/R/normalize.R
@@ -1,4 +1,48 @@
 # Documentation ####
+
+#' @name processExpression
+#' @title Expression Data Processing
+#' @description
+#' Perform data transformations, or set up chains of transformations and
+#' operations to be applied to expression type data in the `giotto` object.
+#' @param gobject `giotto` object
+#' @inheritParams processData
+#' @param expression_values character. Name of matrix to use
+#' @param spat_unit character (optional). spatial unit to use
+#' @param feat_type character (optional). feature type to use
+#' @param return_gobject logical (optional). Whether to return the `gobject`.
+#' When FALSE, the `exprObj` is returned instead.
+#' @returns A `giotto` object when `return_gobject = TRUE`. Otherwise, an
+#' `exprObj`
+#' @seealso [process_param] for processing operations that can be performed
+#' 
+#' [processData()] for the lower level generic handling these operations
+#' @examples
+#' g <- GiottoData::loadGiottoMini("visium")
+#' # single operation
+#' processExpression(g, normParam("library"), name = "library")
+#' 
+#' # single operation with changed parameter
+#' lib <- normParam("library")
+#' lib$scalefactor = 1000
+#' processExpression(g, lib, name = "library2")
+#' 
+#' # return the exprObj instead
+#' processExpression(g, lib, name = "library2", return_gobject = FALSE)
+#' 
+#' # chained operation (this is the Giotto standard normalization)
+#' processExpression(g,
+#'     list(
+#'         normParam("library"),
+#'         normParam("log"),
+#'         scaleParam("zscore", MARGIN = 2),
+#'         scaleParam("zscore", MARGIN = 1)
+#'     ),
+#'     name = "scaled2"
+#' )
+#' @md
+NULL
+
 #' @name processData
 #' @title Composable Data Processing
 #' @description
@@ -8,7 +52,8 @@
 #' and `param` (the transform operation).
 #' @param x data to transform
 #' @param param S4 parameter class defining the transform operation and
-#' params affecting it.
+#' params affecting it. Can also be a list of several of these objects, acting
+#' as a pipeline.
 #' @param name character. [Object name][GiottoClass::giotto_schema] to assign
 #' to the output.
 #' @param \dots additional params to pass
@@ -29,6 +74,9 @@
 #' processData(m, list(lib_norm, log_norm, zscore_cols, zscore_rows))
 #' @seealso [process_param] for processing operations that can be performed
 #' through `processData()`
+#' @seealso [processExpression()] for the way to use this framework with the 
+#' `giotto` object
+#' @returns The same class as `x`
 #' @md
 NULL
 
@@ -62,7 +110,10 @@ NULL
 #' 
 #' @section adjustParam methods:
 #' 
-#' * `"limma"` - limma batch correction
+#' * [`"limma"`][adjust_limma] - limma batch correction
+#' @seealso [processData()] for the generic used to apply these params
+#' @seealso [processExpression()] for the way to use this framework with the 
+#' `giotto` object
 #' @md
 NULL
 
@@ -398,8 +449,31 @@ NULL
 #' @seealso [process_param]
 NULL
 
-
-
+#' @name adjust_limma
+#' @title Limma Batch Correction
+#' @description
+#' Batch effect removal via [limma::removeBatchEffect()]
+#' 
+#' @section params:
+#' 
+#' \tabular{ll}{
+#'   `batch_columns` \tab [svkey][GiottoClass::svkey()] (optional) Up to two
+#'   columns of information from a Giotto object with information indicating
+#'   batches to remove the effects of. \cr
+#'   `covariate_columns` \tab [svkey][GiottoClass::svkey()] (optional) Columns
+#'   of information from a Giotto object with information indicating covariates
+#'   to regress out.
+#' }
+#' @examples
+#' limma <- adjustParam("limma")
+#' limma$covariate_columns <- svkey(feats = c("nr_feats", "total_expr"))
+#' 
+#' g <- GiottoData::loadGiottoMini("visium")
+#' processExpression(g, limma, name = "limma")
+#' @family adjustment parameters
+#' @seealso [process_param]
+#' @md
+NULL
 
 
 # VIRTUAL classes ####
@@ -408,12 +482,15 @@ setClass("scaleParam", contains = c("VIRTUAL", "processParam"))
 setClass("adjustParam", contains = c("VIRTUAL", "processParam"))
 
 # access ####
+#' @export
 .DollarNames.scaleParam <- function(x, pattern) {
     names(x@param)
 }
+#' @export
 .DollarNames.normParam <- function(x, pattern) {
     names(x@param)
 }
+#' @export
 .DollarNames.adjustParam <- function(x, pattern) {
     names(x@param)
 }
@@ -437,72 +514,7 @@ setClass("limmaAdjustParam", contains = "adjustParam")
 setClassUnion("allMatrix", members = c("matrix", "Matrix"))
 
 
-# params setup ####
-.norm_param_lib <- function(...) {
-    p <- new("libraryNormParam", param = list(...))
-    p$scalefactor <- p$scalefactor %null% 6e3
-    p
-}
-.norm_param_log <- function(...) {
-    p <- new("logNormParam", param = list(...))
-    p$base <- p$base %null% 2
-    p$offset <- p$offset %null% 1
-    p
-}
-.norm_param_osmfish <- function(...) {
-    new("osmFISHNormParam", param  = list(...))
-}
-.norm_param_pears_resid <- function(...) {
-    p <- new("pearsonResidNormParam", param = list(...))
-    p$theta <- p$theta %null% 100
-    p
-}
-.norm_param_quantile <- function(...) {
-    new("quantileNormParam", param = list(...))
-}
-.norm_param_default <- function(...) {
-    p <- new("defaultNormParam", param = list(...))
-    p$library_size_norm <- p$library_size_norm %null% TRUE
-    p$scalefactor <- p$scalefactor %null% 6e3
-    p$log_norm <- p$log_norm %null% TRUE
-    p$log_offset <- p$log_offset %null% 1
-    p$logbase <- p$logbase %null% 2
-    p
-}
-.norm_param_tfidf <- function(...) {
-    new("tfidfNormParam", param = list(...))
-}
-.norm_param_l2 <- function(...) {
-    new("l2NormParam", param = list(...))
-}
-
-.scale_param_zscore <- function(...) {
-    p <- new("zscoreScaleParam", param = list(...))
-    p$scale <- p$scale %null% TRUE
-    p$center <- p$center %null% TRUE
-    p$MARGIN <- p$MARGIN %null% 2
-    p
-}
-.scale_param_default <- function(...) {
-    p <- new("defaultScaleParam", param = list(...))
-    p$scale_feats <- p$scale_feats %null% TRUE
-    p$scale_cells <- p$scale_cells %null% TRUE
-    p$scale_order <- p$scale_order %null% c("first_feats", "first_cells")
-    p$verbose <- p$verbose %null% TRUE
-    p
-}
-
 
-.adjust_param_limma <- function(...) {
-    p <- new("limmaAdjustParam", param = list(...))
-    p@param <- if (is.null(p@param$batch_columns)) {
-        c(p@param, list(batch_columns = NULL))
-    }
-    p@param <- if (is.null(p@param$covariate_columns)) {
-        c(p@param, list(covariate_columns = NULL))
-    }
-    p
-}
 
 # param factories ####
 
@@ -555,24 +567,19 @@ adjustParam <- function(method = "limma", ...) {
 # * ANY ####
 
 setMethod("processData",
-signature(x = "ANY", param = "ANY"), function(x, param) {
+signature(x = "ANY", param = "ANY"), function(x, param, ...) {
     stop(wrap_txtf("param of class '%s' is not recognized for use with '%s'", 
                    class(param), class(x)),
          call. = FALSE)
 })
 
-setMethod("processData",
-    signature(x = "ANY", param = "adjustParam"), function(x, param) {
-        "<adjustParam> "    
-    })
-
 # * exprObj ####
 
 #' @rdname processData
 setMethod("processData",
     signature(x = "exprObj", param = "list"),
-    function(x, param, name = "scaled") {
-        x[] <- processData(x[], param)
+    function(x, param, name = "scaled", ...) {
+        x[] <- processData(x[], param, ...)
         objName(x) <- name
         return(x)
     }
@@ -581,8 +588,18 @@ setMethod("processData",
 #' @rdname processData
 setMethod("processData",
     signature(x = "exprObj", param = "normParam"), 
-    function(x, param, name = "normalized") {
-        x[] <- processData(x[], param)
+    function(x, param, name = "normalized", ...) {
+        x[] <- processData(x[], param, ...)
+        objName(x) <- name
+        return(x)
+    }
+)
+
+#' @rdname processData
+setMethod("processData",
+    signature(x = "exprObj", param = "adjustParam"),
+    function(x, param, name = "custom", ...) {
+        x[] <- processData(x[], param, ...)
         objName(x) <- name
         return(x)
     }
@@ -591,12 +608,12 @@ setMethod("processData",
 # specialized handling for osmfish
 setMethod("processData",
     signature(x = "exprObj", param = "osmFISHNormParam"), 
-    function(x, param, name = "custom") {
+    function(x, param, name = "custom", ...) {
         if (!featType(x) %in% c("rna", "RNA")) {
             warning("Caution: osmFISH normalization was developed for RNA in situ data",
                     call. = FALSE)
         }
-        x[] <- processData(x[], param)
+        x[] <- processData(x[], param, ...)
         objName(x) <- name
         return(x)
     }
@@ -605,12 +622,12 @@ setMethod("processData",
 # specialized handling for pearson residual
 setMethod("processData",
     signature(x = "exprObj", param = "pearsonResidNormParam"), 
-    function(x, param, name = "scaled") {
+    function(x, param, name = "scaled", ...) {
         if (!featType(x) %in% c("rna", "RNA")) {
             warning("Caution: pearson residual normalization was developed for RNA count normalization",
                     call. = FALSE)
         }
-        x[] <- processData(x[], param)
+        x[] <- processData(x[], param, ...)
         objName(x) <- name
         return(x)
     }
@@ -619,8 +636,8 @@ setMethod("processData",
 #' @rdname processData
 setMethod("processData",
     signature(x = "exprObj", param = "scaleParam"),
-    function(x, param, name = "scaled") {
-        x[] <- processData(x[], param)
+    function(x, param, name = "scaled", ...) {
+        x[] <- processData(x[], param, ...)
         objName(x) <- name
         return(x)
     }
@@ -634,9 +651,9 @@ setMethod("processData",
 #' @rdname processData
 setMethod("processData",
     signature(x = "allMatrix", param = "list"),
-    function(x, param) {
+    function(x, param, ...) {
         for (p in param) {
-            x <- processData(x, p)
+            x <- processData(x, p, ...)
         }
         return(x)
     }
@@ -646,20 +663,20 @@ setMethod("processData",
 # *** library norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "libraryNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         .lib_norm_giotto(mymatrix = x, scalefactor = param$scalefactor)
     }
 )
 # *** log norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "logNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         log(x + param$offset) / log(param$base)
     }
 )
 setMethod("processData",
     signature(x = "Matrix", param = "logNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         x@x <- log(x@x + param$offset) / log(param$base)
         x
     }
@@ -667,7 +684,7 @@ setMethod("processData",
 # *** osmFISH norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "osmFISHNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         # 1. normalize raw expr per gene with scale-factor equal to number of genes
         norm_feats <- (x / rowSums_flex(x)) * nrow(x)
         # 2. normalize per cells with scale-factor equal to number of cells
@@ -677,7 +694,7 @@ setMethod("processData",
 # *** pearson norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "pearsonResidNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         .pears_resid_citation(verbose = param$verbose)
         .csums <- .csum_nodrop.Matrix
         .rsums <- .rsum_nodrop.Matrix
@@ -692,14 +709,14 @@ setMethod("processData",
 # *** quantile norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "quantileNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         .qnorm(x)
     }
 )
 # *** tf-idf norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "tfidfNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         # compute term frequency (TF)
         tf <- x / rowSums_flex(x)
         # compute inverse document frequency (IDF)
@@ -711,7 +728,7 @@ setMethod("processData",
 # *** default norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "defaultNormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         plist <- list()
         # 1. library size normalization
         if (isTRUE(param$library_size_norm)) {
@@ -725,13 +742,13 @@ setMethod("processData",
                 log_offset = param$log_offset)
             )
         }
-        processData(x, plist)
+        processData(x, plist, ...)
     }
 )
 # *** L2 norm ####
 setMethod("processData",
     signature(x = "allMatrix", param = "l2NormParam"),
-    function(x, param) {
+    function(x, param, ...) {
         .l2_norm(x)
     }
 )
@@ -781,14 +798,66 @@ setMethod("processData",
 )
 
 
+# ** adjust ####
 
+# *** limma ####
 
+setMethod("processData",
+    signature(x = "allMatrix", param = "limmaAdjustParam"),
+    function(x, param, context = NULL, ...) {
+        package_check("limma")
+        if (is.null(context)) {
+            c(
+                "limma adjustment: `context` arg should be a gobject",
+                "containing the columns to use for batches and/or covariates",
+                "information."
+            ) %>%
+                wrap_txt(errWidth = TRUE) %>%
+                stop(call. = FALSE)
+        }
+        batches <- param$batch_columns
+        covariates <- param$covariate_columns
+        if (is.null(batches) && is.null(covariates)) {
+            "limma adjustment: At least one of `batch_columns` or 
+            `covariate_columns` must be provided." %>%
+                wrap_txt() %>%
+                stop(call. = FALSE)
+        }
 
+        sample_order <- colnames(x)
+        limma_args <- list(x = x, ...)
+        # batches
+        if (!is.null(batches)) {
+            b_dt <- .get_svkey(batches, context, sample_order = sample_order)
+            if (ncol(b_dt > 2)) {
+                "max of 2 columns are allowed for 'batch_columns'" %>%
+                    stop(call. = FALSE)
+            } else {
+                limma_args$batch <- b_dt[[1]]
+                if (ncol(b_dt == 2)) {
+                    limma_args$batch2 <- b_dt[[2]]
+                }
+            }
+        }
+        # covariates
+        if (!is.null(covariates)) {
+            c_dt <- .get_svkey(covariates, context, 
+                               sample_order = sample_order)
+            limma_args$covariates <- as.matrix(c_dt)
+        }
+        do.call(limma::removeBatchEffect, args = limma_args) %>%
+            as("Matrix")
+    })
+
+
+#' @rdname processExpression
+#' @export
 processExpression <- function(gobject, param, name,
     expression_values = "raw",
     spat_unit = NULL, 
     feat_type = NULL, 
-    return_gobject = TRUE) {
+    return_gobject = TRUE,
+    ...) {
     ex <- getExpression(gobject,
         values = expression_values,
         spat_unit = spat_unit,
@@ -796,7 +865,23 @@ processExpression <- function(gobject, param, name,
         output = "exprObj",
         set_defaults = TRUE
     )
-    res <- processData(ex, param, name = name)
+    process_args <- list(
+        x = ex,
+        param = param,
+        name = name,
+        ...
+    )
+
+    # detect svkeys
+    if (!is.list(param)) param <- list(param)
+    param_dump <- lapply(param, function(p) {
+        p@param
+    })
+    has_svk <- .check_svkey(unlist(param_dump), type = "any")
+    
+    if (has_svk) process_args$context <- gobject
+    
+    res <- do.call(processData, args = process_args)
     if(!isTRUE(return_gobject)) return(res)
     setGiotto(gobject, res)
 }
@@ -1007,6 +1092,100 @@ normalizeGiotto <- function(gobject,
 
 # internals ####
 
+# * params setup ####
+.norm_param_lib <- function(...) {
+    p <- new("libraryNormParam", param = list(...))
+    p$scalefactor <- p$scalefactor %null% 6e3
+    p
+}
+.norm_param_log <- function(...) {
+    p <- new("logNormParam", param = list(...))
+    p$base <- p$base %null% 2
+    p$offset <- p$offset %null% 1
+    p
+}
+.norm_param_osmfish <- function(...) {
+    new("osmFISHNormParam", param  = list(...))
+}
+.norm_param_pears_resid <- function(...) {
+    p <- new("pearsonResidNormParam", param = list(...))
+    p$theta <- p$theta %null% 100
+    p
+}
+.norm_param_quantile <- function(...) {
+    new("quantileNormParam", param = list(...))
+}
+.norm_param_default <- function(...) {
+    p <- new("defaultNormParam", param = list(...))
+    p$library_size_norm <- p$library_size_norm %null% TRUE
+    p$scalefactor <- p$scalefactor %null% 6e3
+    p$log_norm <- p$log_norm %null% TRUE
+    p$log_offset <- p$log_offset %null% 1
+    p$logbase <- p$logbase %null% 2
+    p
+}
+.norm_param_tfidf <- function(...) {
+    new("tfidfNormParam", param = list(...))
+}
+.norm_param_l2 <- function(...) {
+    new("l2NormParam", param = list(...))
+}
+
+.scale_param_zscore <- function(...) {
+    p <- new("zscoreScaleParam", param = list(...))
+    p$scale <- p$scale %null% TRUE
+    p$center <- p$center %null% TRUE
+    p$MARGIN <- p$MARGIN %null% 2
+    p
+}
+.scale_param_default <- function(...) {
+    p <- new("defaultScaleParam", param = list(...))
+    p$scale_feats <- p$scale_feats %null% TRUE
+    p$scale_cells <- p$scale_cells %null% TRUE
+    p$scale_order <- p$scale_order %null% c("first_feats", "first_cells")
+    p$verbose <- p$verbose %null% TRUE
+    p
+}
+
+
+.adjust_param_limma <- function(...) {
+    p <- new("limmaAdjustParam", param = list(...))
+    p@param <- if (is.null(p@param$batch_columns)) {
+        c(p@param, list(batch_columns = NULL))
+    }
+    p@param <- if (is.null(p@param$covariate_columns)) {
+        c(p@param, list(covariate_columns = NULL))
+    }
+    p
+}
+
+
+# * implementations ####
+
+.check_svkey <- function(x, type = c("all", "any")) {
+    type <- match.arg(type, choices = c("all", "any"))
+    if (!inherits(x, "list")) x <- list(x)
+    res <- vapply(x, FUN = inherits, FUN.VALUE = logical(1L), "svkey")
+    switch (type,
+        "any" = any(res),
+        "all" = all(res)
+    )
+}
+
+# get from gobject and ensure order is correct.
+# return without cell_IDs col
+.get_svkey <- function(x, gobject, sample_order = NULL) {
+    if (!inherits(x, "list")) x <- list(x)
+    reslist <- lapply(x, function(key) {
+        data <- key@get(gobject)
+        if (!is.null(sample_order)) {
+            data <- data[match(cell_ID, sample_order)]
+        }
+        return(data[, -"cell_ID"])
+    })
+    Reduce(cbind, reslist)
+}
+
 .l2_norm <- function(x) {
     # Calculate column norms (Euclidean length of each column)
     col_norms <- sqrt(colSums_flex(x^2))
diff --git a/man/adjust_limma.Rd b/man/adjust_limma.Rd
new file mode 100644
index 000000000..55386ae4c
--- /dev/null
+++ b/man/adjust_limma.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{adjust_limma}
+\alias{adjust_limma}
+\title{Limma Batch Correction}
+\description{
+Batch effect removal via \code{\link[limma:removeBatchEffect]{limma::removeBatchEffect()}}
+}
+\section{params}{
+
+
+\tabular{ll}{
+\code{batch_columns} \tab \link[GiottoClass:spatValues]{svkey} (optional) Up to two
+columns of information from a Giotto object with information indicating
+batches to remove the effects of. \cr
+\code{covariate_columns} \tab \link[GiottoClass:spatValues]{svkey} (optional) Columns
+of information from a Giotto object with information indicating covariates
+to regress out.
+}
+}
+
+\examples{
+limma <- adjustParam("limma")
+limma$covariate_columns <- svkey(feats = c("nr_feats", "total_expr"))
+
+g <- GiottoData::loadGiottoMini("visium")
+processExpression(g, limma, name = "limma")
+}
+\seealso{
+\link{process_param}
+}
+\concept{adjustment parameters}
diff --git a/man/processData.Rd b/man/processData.Rd
index aee07aaeb..d68376800 100644
--- a/man/processData.Rd
+++ b/man/processData.Rd
@@ -4,29 +4,36 @@
 \alias{processData}
 \alias{processData,exprObj,list-method}
 \alias{processData,exprObj,normParam-method}
+\alias{processData,exprObj,adjustParam-method}
 \alias{processData,exprObj,scaleParam-method}
 \alias{processData,allMatrix,list-method}
 \title{Composable Data Processing}
 \usage{
-\S4method{processData}{exprObj,list}(x, param, name = "scaled")
+\S4method{processData}{exprObj,list}(x, param, name = "scaled", ...)
 
-\S4method{processData}{exprObj,normParam}(x, param, name = "normalized")
+\S4method{processData}{exprObj,normParam}(x, param, name = "normalized", ...)
 
-\S4method{processData}{exprObj,scaleParam}(x, param, name = "scaled")
+\S4method{processData}{exprObj,adjustParam}(x, param, name = "custom", ...)
 
-\S4method{processData}{allMatrix,list}(x, param)
+\S4method{processData}{exprObj,scaleParam}(x, param, name = "scaled", ...)
+
+\S4method{processData}{allMatrix,list}(x, param, ...)
 }
 \arguments{
 \item{x}{data to transform}
 
 \item{param}{S4 parameter class defining the transform operation and
-params affecting it.}
+params affecting it. Can also be a list of several of these objects, acting
+as a pipeline.}
 
 \item{name}{character. \link[GiottoClass:giotto_schema]{Object name} to assign
 to the output.}
 
 \item{\dots}{additional params to pass}
 }
+\value{
+The same class as \code{x}
+}
 \description{
 Perform data transformations, or set up chains of transformations and
 operations to be applied to matrix type data. \code{processData()} is a generic
@@ -52,4 +59,7 @@ processData(m, list(lib_norm, log_norm, zscore_cols, zscore_rows))
 \seealso{
 \link{process_param} for processing operations that can be performed
 through \code{processData()}
+
+\code{\link[=processExpression]{processExpression()}} for the way to use this framework with the
+\code{giotto} object
 }
diff --git a/man/processExpression.Rd b/man/processExpression.Rd
new file mode 100644
index 000000000..3b90e8e84
--- /dev/null
+++ b/man/processExpression.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/normalize.R
+\name{processExpression}
+\alias{processExpression}
+\title{Expression Data Processing}
+\usage{
+processExpression(
+  gobject,
+  param,
+  name,
+  expression_values = "raw",
+  spat_unit = NULL,
+  feat_type = NULL,
+  return_gobject = TRUE,
+  ...
+)
+}
+\arguments{
+\item{gobject}{\code{giotto} object}
+
+\item{param}{S4 parameter class defining the transform operation and
+params affecting it. Can also be a list of several of these objects, acting
+as a pipeline.}
+
+\item{name}{character. \link[GiottoClass:giotto_schema]{Object name} to assign
+to the output.}
+
+\item{expression_values}{character. Name of matrix to use}
+
+\item{spat_unit}{character (optional). spatial unit to use}
+
+\item{feat_type}{character (optional). feature type to use}
+
+\item{return_gobject}{logical (optional). Whether to return the \code{gobject}.
+When FALSE, the \code{exprObj} is returned instead.}
+
+\item{...}{additional params to pass}
+}
+\value{
+A \code{giotto} object when \code{return_gobject = TRUE}. Otherwise, an
+\code{exprObj}
+}
+\description{
+Perform data transformations, or set up chains of transformations and
+operations to be applied to expression type data in the \code{giotto} object.
+}
+\examples{
+g <- GiottoData::loadGiottoMini("visium")
+# single operation
+processExpression(g, normParam("library"), name = "library")
+
+# single operation with changed parameter
+lib <- normParam("library")
+lib$scalefactor = 1000
+processExpression(g, lib, name = "library2")
+
+# return the exprObj instead
+processExpression(g, lib, name = "library2", return_gobject = FALSE)
+
+# chained operation (this is the Giotto standard normalization)
+processExpression(g,
+    list(
+        normParam("library"),
+        normParam("log"),
+        scaleParam("zscore", MARGIN = 2),
+        scaleParam("zscore", MARGIN = 1)
+    ),
+    name = "scaled2"
+)
+}
+\seealso{
+\link{process_param} for processing operations that can be performed
+
+\code{\link[=processData]{processData()}} for the lower level generic handling these operations
+}
diff --git a/man/process_param.Rd b/man/process_param.Rd
index cdeaf46fb..a769ae578 100644
--- a/man/process_param.Rd
+++ b/man/process_param.Rd
@@ -53,7 +53,13 @@ with a \code{MARGIN} param allowing scaling long either cols or rows
 \section{adjustParam methods}{
 
 \itemize{
-\item \code{"limma"} - limma batch correction
+\item \code{\link[=adjust_limma]{"limma"}} - limma batch correction
 }
 }
 
+\seealso{
+\code{\link[=processData]{processData()}} for the generic used to apply these params
+
+\code{\link[=processExpression]{processExpression()}} for the way to use this framework with the
+\code{giotto} object
+}