From cc51bae4ad5c3807e9183c1237633dde5feea693 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Thu, 25 Feb 2021 02:40:16 -0500 Subject: [PATCH] refactor whitening for closer integration with StatsBase types (part of #109) --- docs/make.jl | 3 +- docs/src/index.md | 3 +- docs/src/whiten.md | 36 +++++++++++++++ src/MultivariateStats.jl | 12 +++-- src/common.jl | 2 +- src/types.jl | 14 ++++++ src/whiten.jl | 99 +++++++++++++++++++++++++++++++++------- test/whiten.jl | 10 +++- 8 files changed, 153 insertions(+), 26 deletions(-) create mode 100644 docs/src/whiten.md create mode 100644 src/types.jl diff --git a/docs/make.jl b/docs/make.jl index ee5a7bb..1898011 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,7 +7,8 @@ end makedocs( sitename = "MultivariateStats.jl", modules = [MultivariateStats], - pages = ["index.md"] + pages = ["index.md", + "whiten.md"] ) deploydocs( diff --git a/docs/src/index.md b/docs/src/index.md index f907015..419815b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,9 +10,8 @@ end [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc. - ```@contents -Pages = [] +Pages = ["whiten.md"] Depth = 2 ``` diff --git a/docs/src/whiten.md b/docs/src/whiten.md new file mode 100644 index 0000000..229f144 --- /dev/null +++ b/docs/src/whiten.md @@ -0,0 +1,36 @@ +# Data Transformation + +## Whitening + +A [whitening transformation](http://en.wikipedia.org/wiki/Whitening_transformation>) is a decorrelation transformation that transforms a set of random variables into a set of new random variables with identity covariance (uncorrelated with unit variances). + +In particular, suppose a random vector has covariance ``\mathbf{C}``, then a whitening transform ``\mathbf{W}`` is one that satisfy: + +```math + \mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I} +``` + +Note that ``\mathbf{W}`` is generally not unique. In particular, if ``\mathbf{W}`` is a whitening transform, so is any of its rotation ``\mathbf{W} \mathbf{R}`` with ``\mathbf{R}^T \mathbf{R} = \mathbf{I}``. + +The package uses [`Whitening`](@ref) to represent a whitening transform. + +```@docs +Whitening +``` + +Whitening transformation can be fitted to data using the `fit` method. + +```@docs +fit(::Type{Whitening}, X::AbstractMatrix{T}; kwargs...) where {T<:Real} +transform(::Whitening, ::AbstractVecOrMat) +indim +outdim +mean(::Whitening) +size(::Whitening) +``` + +Additional methods +```@docs +cov_whitening +cov_whitening! +``` diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 3ce4b37..1b3db05 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -1,16 +1,17 @@ module MultivariateStats using LinearAlgebra - using StatsBase: SimpleCovariance, CovarianceEstimator + using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel, + AbstractDataTransform import Statistics: mean, var, cov, covm import Base: length, size, show, dump - import StatsBase: fit, predict, ConvergenceException + import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef import SparseArrays import LinearAlgebra: eigvals export ## common - evaluate, # evaluate discriminant function values (imported from Base) + evaluate, # evaluate discriminant function values predict, # use a model to predict responses (imported from StatsBase) fit, # fit a model to data (imported from StatsBase) centralize, # subtract a mean vector from each column @@ -112,8 +113,8 @@ module MultivariateStats faem, # Maximum likelihood probabilistic PCA facm # EM algorithm for probabilistic PCA - ## source files + include("types.jl") include("common.jl") include("lreg.jl") include("whiten.jl") @@ -126,4 +127,7 @@ module MultivariateStats include("ica.jl") include("fa.jl") + # @deprecate transform(m, x; kwargs...) predict(m, x; kwargs...) #ex=false + # @deprecate transform(m; kwargs...) predict(m; kwargs...) #ex=false + end # module diff --git a/src/common.jl b/src/common.jl index 9d6d094..3090ecc 100644 --- a/src/common.jl +++ b/src/common.jl @@ -20,7 +20,7 @@ decentralize(x::AbstractMatrix, m::AbstractVector) = (isempty(m) ? x : x .+ m) # get a full mean vector -fullmean(d::Int, mv::Vector{T}) where T = (isempty(mv) ? zeros(T, d) : mv) +fullmean(d::Int, mv::AbstractVector{T}) where T = (isempty(mv) ? zeros(T, d) : mv) preprocess_mean(X::AbstractMatrix{T}, m) where T<:Real = (m === nothing ? vec(mean(X, dims=2)) : m == 0 ? T[] : m) diff --git a/src/types.jl b/src/types.jl new file mode 100644 index 0000000..b220411 --- /dev/null +++ b/src/types.jl @@ -0,0 +1,14 @@ + +""" + indim(m) + +Get the out dimension of the model `m`. +""" +function indim(m::RegressionModel) end + +""" + outdim(m) + +Get the out dimension of the model `m`. +""" +function outdim(m::RegressionModel) end diff --git a/src/whiten.jl b/src/whiten.jl index 51037be..7c3ad6f 100644 --- a/src/whiten.jl +++ b/src/whiten.jl @@ -1,27 +1,51 @@ # Whitening -## Solve whitening based on covariance -# -# finds W, such that W'CW = I -# +""" + cov_whitening(C) + +Derive the whitening transform coefficient matrix `W` given the covariance matrix `C`. Here, `C` can be either a square matrix, or an instance of `Cholesky`. + +Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let ``\\mathbf{C} = \\mathbf{U}^T \\mathbf{U}`` and ``\\mathbf{W} = \\mathbf{U}^{-1}``, then ``\\mathbf{W}^T \\mathbf{C} \\mathbf{W} = \\mathbf{I}``. + +**Note:** The return matrix `W` is an upper triangular matrix. +""" function cov_whitening(C::Cholesky{T}) where {T<:Real} cf = C.UL Matrix{T}(inv(istriu(cf) ? cf : cf')) end -cov_whitening!(C::DenseMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U))) -cov_whitening(C::DenseMatrix{<:Real}) = cov_whitening!(copy(C)) +""" + cov_whitening!(C) + +In-place version of `cov_whitening(C)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used. +""" +cov_whitening!(C::AbstractMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U))) +cov_whitening(C::AbstractMatrix{<:Real}) = cov_whitening!(copy(C)) + +""" + cov_whitening!(C, regcoef) -cov_whitening!(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef)) -cov_whitening(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef) +In-place version of `cov_whitening(C, regcoef)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used. +""" +cov_whitening!(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef)) + +""" + cov_whitening(C, regcoef) + +Derive a whitening transform based on a regularized covariance, as `C + (eigmax(C) * regcoef) * eye(d)`. +""" +cov_whitening(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef) ## Whitening type -struct Whitening{T<:Real} - mean::Vector{T} - W::Matrix{T} +""" +A whitening transform representation. +""" +struct Whitening{T<:Real} <: AbstractDataTransform + mean::AbstractVector{T} + W::AbstractMatrix{T} - function Whitening{T}(mean::Vector{T}, W::Matrix{T}) where {T<:Real} + function Whitening{T}(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real} d, d2 = size(W) d == d2 || error("W must be a square matrix.") isempty(mean) || length(mean) == d || @@ -29,17 +53,53 @@ struct Whitening{T<:Real} return new(mean, W) end end -Whitening(mean::Vector{T}, W::Matrix{T}) where {T<:Real} = Whitening{T}(mean, W) +Whitening(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real} = Whitening{T}(mean, W) indim(f::Whitening) = size(f.W, 1) outdim(f::Whitening) = size(f.W, 2) + +""" + size(f) + +Dimensions of the coefficient matrix of the whitening transform `f`. +""" +size(f::Whitening) = size(f.W) + +""" + mean(f) + +Get the mean vector of the whitening transformation `f`. + +**Note:** if mean is empty, this function returns a zero vector of length [`outdim`](@ref) . +""" mean(f::Whitening) = fullmean(indim(f), f.mean) + +""" + transform(f, x) + +Apply the whitening transform `f` to a vector or a matrix `x` with samples in columns, as ``\\mathbf{W}^T (\\mathbf{x} - \\boldsymbol{\\mu})``. +""" transform(f::Whitening, x::AbstractVecOrMat{<:Real}) = transpose(f.W) * centralize(x, f.mean) -## Fit whitening to data +""" + fit(::Type{Whitening}, X::AbstractMatrix{T}; kwargs...) + +Estimate a whitening transform from the data given in `X`. Here, `X` should be a matrix, whose columns give the samples. -function fit(::Type{Whitening}, X::DenseMatrix{T}; +This function returns an instance of [`Whitening`](@ref) + +**Keyword Arguments:** +- `regcoef`: The regularization coefficient. The covariance will be regularized as follows when `regcoef` is positive `C + (eigmax(C) * regcoef) * eye(d)`. Default values is `zero(T)`. + +- `mean`: The mean vector, which can be either of: + - `0`: the input data has already been centralized + - `nothing`: this function will compute the mean (**default**) + - a pre-computed mean vector + +**Note:** This function internally relies on [`cov_whitening`](@ref) to derive the transformation `W`. +""" +function fit(::Type{Whitening}, X::AbstractMatrix{T}; mean=nothing, regcoef::Real=zero(T)) where {T<:Real} n = size(X, 2) n > 1 || error("X must contain more than one sample.") @@ -51,7 +111,7 @@ end # invsqrtm -function _invsqrtm!(C::Matrix{<:Real}) +function _invsqrtm!(C::AbstractMatrix{<:Real}) n = size(C, 1) size(C, 2) == n || error("C must be a square matrix.") E = eigen!(Symmetric(C)) @@ -64,4 +124,9 @@ function _invsqrtm!(C::Matrix{<:Real}) return U * transpose(U) end -invsqrtm(C::DenseMatrix{<:Real}) = _invsqrtm!(copy(C)) +""" + invsqrtm(C) + +Compute `inv(sqrtm(C))` through symmetric eigenvalue decomposition. +""" +invsqrtm(C::AbstractMatrix{<:Real}) = _invsqrtm!(copy(C)) diff --git a/test/whiten.jl b/test/whiten.jl index 69f296e..3f3b8a4 100644 --- a/test/whiten.jl +++ b/test/whiten.jl @@ -1,5 +1,5 @@ using MultivariateStats -using LinearAlgebra +using LinearAlgebra, StatsBase, SparseArrays using Test import Statistics: mean, cov import Random @@ -55,6 +55,9 @@ import Random W = f.W @test isa(f, Whitening{Float64}) @test mean(f) === f.mean + @test indim(f) == d + @test outdim(f) == d + @test size(f) == (d,d) @test istriu(W) @test W'C * W ≈ Matrix(I, d, d) @test transform(f, X) ≈ W' * (X .- f.mean) @@ -92,4 +95,9 @@ import Random # type consistency @test eltype(mean(M)) == Float64 @test eltype(mean(MM)) == Float32 + + # sparse arrays + SX = sprand(Float32, d, n, 0.75) + SM = fit(Whitening, SX; mean=sprand(Float32, 3, 0.75)) + @test transform(SM, SX) isa Matrix{Float32} end