From cc51bae4ad5c3807e9183c1237633dde5feea693 Mon Sep 17 00:00:00 2001
From: Art Wild <wildart@gmail.com>
Date: Thu, 25 Feb 2021 02:40:16 -0500
Subject: [PATCH] refactor whitening for closer integration with StatsBase
 types (part of #109)

---
 docs/make.jl             |  3 +-
 docs/src/index.md        |  3 +-
 docs/src/whiten.md       | 36 +++++++++++++++
 src/MultivariateStats.jl | 12 +++--
 src/common.jl            |  2 +-
 src/types.jl             | 14 ++++++
 src/whiten.jl            | 99 +++++++++++++++++++++++++++++++++-------
 test/whiten.jl           | 10 +++-
 8 files changed, 153 insertions(+), 26 deletions(-)
 create mode 100644 docs/src/whiten.md
 create mode 100644 src/types.jl

diff --git a/docs/make.jl b/docs/make.jl
index ee5a7bb..1898011 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,7 +7,8 @@ end
 makedocs(
     sitename = "MultivariateStats.jl",
     modules = [MultivariateStats],
-    pages = ["index.md"]
+    pages = ["index.md",
+             "whiten.md"]
 )
 
 deploydocs(
diff --git a/docs/src/index.md b/docs/src/index.md
index f907015..419815b 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -10,9 +10,8 @@ end
 
 [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc.
 
-
 ```@contents
-Pages = []
+Pages = ["whiten.md"]
 Depth = 2
 ```
 
diff --git a/docs/src/whiten.md b/docs/src/whiten.md
new file mode 100644
index 0000000..229f144
--- /dev/null
+++ b/docs/src/whiten.md
@@ -0,0 +1,36 @@
+# Data Transformation
+
+## Whitening
+
+A [whitening transformation](http://en.wikipedia.org/wiki/Whitening_transformation>) is a decorrelation transformation that transforms a set of random variables into a set of new random variables with identity covariance (uncorrelated with unit variances).
+
+In particular, suppose a random vector has covariance ``\mathbf{C}``, then a whitening transform ``\mathbf{W}`` is one that satisfy:
+
+```math
+   \mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I}
+```
+
+Note that ``\mathbf{W}`` is generally not unique. In particular, if ``\mathbf{W}`` is a whitening transform, so is any of its rotation ``\mathbf{W} \mathbf{R}`` with ``\mathbf{R}^T \mathbf{R} = \mathbf{I}``.
+
+The package uses [`Whitening`](@ref) to represent a whitening transform.
+
+```@docs
+Whitening
+```
+
+Whitening transformation can be fitted to data using the `fit` method.
+
+```@docs
+fit(::Type{Whitening}, X::AbstractMatrix{T}; kwargs...) where {T<:Real}
+transform(::Whitening, ::AbstractVecOrMat)
+indim
+outdim
+mean(::Whitening)
+size(::Whitening)
+```
+
+Additional methods
+```@docs
+cov_whitening
+cov_whitening!
+```
diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl
index 3ce4b37..1b3db05 100644
--- a/src/MultivariateStats.jl
+++ b/src/MultivariateStats.jl
@@ -1,16 +1,17 @@
 module MultivariateStats
     using LinearAlgebra
-    using StatsBase: SimpleCovariance, CovarianceEstimator
+    using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel,
+                     AbstractDataTransform
     import Statistics: mean, var, cov, covm
     import Base: length, size, show, dump
-    import StatsBase: fit, predict, ConvergenceException
+    import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef
     import SparseArrays
     import LinearAlgebra: eigvals
 
     export
 
     ## common
-    evaluate,           # evaluate discriminant function values (imported from Base)
+    evaluate,           # evaluate discriminant function values
     predict,            # use a model to predict responses (imported from StatsBase)
     fit,                # fit a model to data (imported from StatsBase)
     centralize,         # subtract a mean vector from each column
@@ -112,8 +113,8 @@ module MultivariateStats
     faem,                   # Maximum likelihood probabilistic PCA
     facm                    # EM algorithm for probabilistic PCA
 
-
     ## source files
+    include("types.jl")
     include("common.jl")
     include("lreg.jl")
     include("whiten.jl")
@@ -126,4 +127,7 @@ module MultivariateStats
     include("ica.jl")
     include("fa.jl")
 
+    # @deprecate transform(m, x; kwargs...) predict(m, x; kwargs...) #ex=false
+    # @deprecate transform(m; kwargs...) predict(m; kwargs...) #ex=false
+
 end # module
diff --git a/src/common.jl b/src/common.jl
index 9d6d094..3090ecc 100644
--- a/src/common.jl
+++ b/src/common.jl
@@ -20,7 +20,7 @@ decentralize(x::AbstractMatrix, m::AbstractVector) = (isempty(m) ? x : x .+ m)
 
 # get a full mean vector
 
-fullmean(d::Int, mv::Vector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
+fullmean(d::Int, mv::AbstractVector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
 
 preprocess_mean(X::AbstractMatrix{T}, m) where T<:Real =
     (m === nothing ? vec(mean(X, dims=2)) : m == 0 ? T[] :  m)
diff --git a/src/types.jl b/src/types.jl
new file mode 100644
index 0000000..b220411
--- /dev/null
+++ b/src/types.jl
@@ -0,0 +1,14 @@
+
+"""
+    indim(m)
+
+Get the out dimension of the model `m`.
+"""
+function indim(m::RegressionModel) end
+
+"""
+    outdim(m)
+
+Get the out dimension of the model `m`.
+"""
+function outdim(m::RegressionModel) end
diff --git a/src/whiten.jl b/src/whiten.jl
index 51037be..7c3ad6f 100644
--- a/src/whiten.jl
+++ b/src/whiten.jl
@@ -1,27 +1,51 @@
 # Whitening
 
-## Solve whitening based on covariance
-#
-# finds W, such that W'CW = I
-#
+"""
+    cov_whitening(C)
+
+Derive the whitening transform coefficient matrix `W` given the covariance matrix `C`. Here, `C` can be either a square matrix, or an instance of `Cholesky`.
+
+Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let ``\\mathbf{C} = \\mathbf{U}^T \\mathbf{U}`` and ``\\mathbf{W} = \\mathbf{U}^{-1}``, then ``\\mathbf{W}^T \\mathbf{C} \\mathbf{W} = \\mathbf{I}``.
+
+**Note:** The return matrix `W` is an upper triangular matrix.
+"""
 function cov_whitening(C::Cholesky{T}) where {T<:Real}
     cf = C.UL
     Matrix{T}(inv(istriu(cf) ? cf : cf'))
 end
 
-cov_whitening!(C::DenseMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
-cov_whitening(C::DenseMatrix{<:Real}) = cov_whitening!(copy(C))
+"""
+    cov_whitening!(C)
+
+In-place version of `cov_whitening(C)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
+cov_whitening(C::AbstractMatrix{<:Real}) = cov_whitening!(copy(C))
+
+"""
+    cov_whitening!(C, regcoef)
 
-cov_whitening!(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
-cov_whitening(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
+In-place version of `cov_whitening(C, regcoef)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
+
+"""
+    cov_whitening(C, regcoef)
+
+Derive a whitening transform based on a regularized covariance, as `C + (eigmax(C) * regcoef) * eye(d)`.
+"""
+cov_whitening(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
 
 ## Whitening type
 
-struct Whitening{T<:Real}
-    mean::Vector{T}
-    W::Matrix{T}
+"""
+A whitening transform representation.
+"""
+struct Whitening{T<:Real} <: AbstractDataTransform
+    mean::AbstractVector{T}
+    W::AbstractMatrix{T}
 
-    function Whitening{T}(mean::Vector{T}, W::Matrix{T}) where {T<:Real}
+    function Whitening{T}(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real}
         d, d2 = size(W)
         d == d2 || error("W must be a square matrix.")
         isempty(mean) || length(mean) == d ||
@@ -29,17 +53,53 @@ struct Whitening{T<:Real}
         return new(mean, W)
     end
 end
-Whitening(mean::Vector{T}, W::Matrix{T}) where {T<:Real} = Whitening{T}(mean, W)
+Whitening(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real} = Whitening{T}(mean, W)
 
 indim(f::Whitening) = size(f.W, 1)
 outdim(f::Whitening) = size(f.W, 2)
+
+"""
+    size(f)
+
+Dimensions of the coefficient matrix of the whitening transform `f`.
+"""
+size(f::Whitening) = size(f.W)
+
+"""
+    mean(f)
+
+Get the mean vector of the whitening transformation `f`.
+
+**Note:** if mean is empty, this function returns a zero vector of length [`outdim`](@ref) .
+"""
 mean(f::Whitening) = fullmean(indim(f), f.mean)
 
+
+"""
+    transform(f, x)
+
+Apply the whitening transform `f` to a vector or a matrix `x` with samples in columns, as ``\\mathbf{W}^T (\\mathbf{x} - \\boldsymbol{\\mu})``.
+"""
 transform(f::Whitening, x::AbstractVecOrMat{<:Real}) = transpose(f.W) * centralize(x, f.mean)
 
-## Fit whitening to data
+"""
+    fit(::Type{Whitening},  X::AbstractMatrix{T}; kwargs...)
+
+Estimate a whitening transform from the data given in `X`. Here, `X` should be a matrix, whose columns give the samples.
 
-function fit(::Type{Whitening}, X::DenseMatrix{T};
+This function returns an instance of [`Whitening`](@ref)
+
+**Keyword Arguments:**
+- `regcoef`: The regularization coefficient. The covariance will be regularized as follows when `regcoef` is positive `C + (eigmax(C) * regcoef) * eye(d)`. Default values is `zero(T)`.
+
+- `mean`: The mean vector, which can be either of:
+    - `0`: the input data has already been centralized
+    - `nothing`: this function will compute the mean (**default**)
+    - a pre-computed mean vector
+
+**Note:** This function internally relies on [`cov_whitening`](@ref) to derive the transformation `W`.
+"""
+function fit(::Type{Whitening}, X::AbstractMatrix{T};
              mean=nothing, regcoef::Real=zero(T)) where {T<:Real}
     n = size(X, 2)
     n > 1 || error("X must contain more than one sample.")
@@ -51,7 +111,7 @@ end
 
 # invsqrtm
 
-function _invsqrtm!(C::Matrix{<:Real})
+function _invsqrtm!(C::AbstractMatrix{<:Real})
     n = size(C, 1)
     size(C, 2) == n || error("C must be a square matrix.")
     E = eigen!(Symmetric(C))
@@ -64,4 +124,9 @@ function _invsqrtm!(C::Matrix{<:Real})
     return U * transpose(U)
 end
 
-invsqrtm(C::DenseMatrix{<:Real}) = _invsqrtm!(copy(C))
+"""
+    invsqrtm(C)
+
+Compute `inv(sqrtm(C))` through symmetric eigenvalue decomposition.
+"""
+invsqrtm(C::AbstractMatrix{<:Real}) = _invsqrtm!(copy(C))
diff --git a/test/whiten.jl b/test/whiten.jl
index 69f296e..3f3b8a4 100644
--- a/test/whiten.jl
+++ b/test/whiten.jl
@@ -1,5 +1,5 @@
 using MultivariateStats
-using LinearAlgebra
+using LinearAlgebra, StatsBase, SparseArrays
 using Test
 import Statistics: mean, cov
 import Random
@@ -55,6 +55,9 @@ import Random
     W = f.W
     @test isa(f, Whitening{Float64})
     @test mean(f) === f.mean
+    @test indim(f) == d
+    @test outdim(f) == d
+    @test size(f) == (d,d)
     @test istriu(W)
     @test W'C * W ≈ Matrix(I, d, d)
     @test transform(f, X) ≈ W' * (X .- f.mean)
@@ -92,4 +95,9 @@ import Random
     # type consistency
     @test eltype(mean(M)) == Float64
     @test eltype(mean(MM)) == Float32
+
+    # sparse arrays
+    SX = sprand(Float32, d, n, 0.75)
+    SM = fit(Whitening, SX; mean=sprand(Float32, 3, 0.75))
+    @test transform(SM, SX) isa Matrix{Float32}
 end