From c0f74de21093ca4433d44187fec11bee4723a56c Mon Sep 17 00:00:00 2001 From: Art Date: Thu, 28 Oct 2021 16:43:31 -0400 Subject: [PATCH] Refactor PCA and docs (#163) * add new DR subtypes and subtype PCA class * add `loadings` & fix tests (close #123) * migrated PCA docs * allow the nightly build to fail in CI * fixed deprecated calls in tests * Relax type-asserts in PCA (close #140, close #141) --- .github/workflows/ci.yml | 9 +- docs/Project.toml | 2 + docs/make.jl | 4 +- docs/src/api.md | 37 ++++----- docs/src/index.md | 2 +- docs/src/pca.md | 90 ++++++++++++++++++++ src/MultivariateStats.jl | 15 ++-- src/pca.jl | 172 +++++++++++++++++++++++++++++++++------ src/types.jl | 27 ++++++ test/pca.jl | 60 +++++++------- 10 files changed, 337 insertions(+), 81 deletions(-) create mode 100644 docs/src/pca.md create mode 100644 src/types.jl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc395cd..b3cd9b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,19 +9,26 @@ jobs: if: "!contains(github.event.head_commit.message, 'skip ci')" name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.allow-to-fail }} strategy: fail-fast: false matrix: version: - '1.1' - '1' # automatically expands to the latest stable 1.x release of Julia - - 'nightly' + # - 'nightly' os: - ubuntu-latest # - macOS-latest # - windows-latest arch: - x64 + allow-to-fail: [false] + include: + - version: 'nightly' + os: ubuntu-latest + arch: x64 + allow-to-fail: true steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/docs/Project.toml b/docs/Project.toml index 853eb6f..7c34cbb 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,8 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] diff --git a/docs/make.jl b/docs/make.jl index d4cdf4b..684bd93 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,7 +7,9 @@ end makedocs( sitename = "MultivariateStats.jl", modules = [MultivariateStats], - pages = ["Home"=>"index.md", "whiten.md", "lda.md", "Development"=>"api.md"] + pages = ["Home"=>"index.md", + "whiten.md", "lda.md", "pca.md", + "Development"=>"api.md"] ) deploydocs( diff --git a/docs/src/api.md b/docs/src/api.md index 9cc1bde..56f2c40 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -20,8 +20,8 @@ Table of the package models and corresponding function names used by these model |loadings | ? | | | ? | x | x | ? | ? | ? | |eigvals | | | | | ? | ? | ? | ? | x | |eigvecs | | | | | ? | ? | ? | ? | ? | -|length | | | | | | | | | | -|size | | | | | | | | | | +|length | | | | | | | | | | +|size | | | | | | | | | | Note: `?` refers to a possible implementation that is missing or called differently. @@ -35,44 +35,43 @@ Note: `?` refers to a possible implementation that is missing or called differen |indim | - | | | - | - | x | x | x | x | x | x | |outdim | - | x | | - | - | x | x | x | x | x | x | |mean | x | x | | x | x | x | x | x | x | ? | | -|var | | | | | | | x | x | ? | ? | ? | -|cov | | | | | | | x | ? | | | | +|var | | | | | | | x | x | x | ? | ? | +|cov | | | | | | | x | x | | | | |cor | | x | | | | | | | | | | |projection | ? | x | | x | x | | x | x | x | x | x | |reconstruct | | | | | | | x | x | x | x | | -|loadings | | ? | | | | | x | x | ? | ? | ? | -|eigvals | | | | | + | | ? | ? | ? | ? | x | -|eigvecs | | | | | | | ? | ? | ? | ? | ? | +|loadings | | ? | | | | | x | x | x | ? | ? | +|eigvals | | | | | + | | ? | ? | x | ? | x | +|eigvecs | | | | | | | ? | ? | x | ? | ? | |length | + | | + | + | + | | | | | | | -|size | + | | | + | + | | | | | | | +|size | + | | | + | + | | | | x | | | | | | | | | | | | | | | | - StatsBase.AbstractDataTransform - Whitening - - Interface: fit, transfrom + - Interface: fit, transform - New: length, mean, size - StatsBase.RegressionModel + - *Interface:* fit, predict - LinearDiscriminant - - Methods: - - Interface: fit, predict, coef, dof, weights - - New: evaluate, length + - Functions: coef, dof, weights, evaluate, length - MulticlassLDA - - Methods: fit, predict, size, mean, projection - - New: length + - Functions: size, mean, projection, length - SubspaceLDA - - Methods: fit, predict, size, mean, projection - - New: length, eigvals + - Functions: size, mean, projection, length, eigvals - CCA - - Methods: fit, transfrom, indim, outdim, mean + - Functions: indim, outdim, mean - Subtypes: - AbstractDimensionalityReduction - - Methods: projection, var, reconstruct, loadings + - *Interface:* projection, var, reconstruct, loadings + - *Interface:* projection == weights - Subtypes: - LinearDimensionalityReduction - Methods: ICA, PCA - NonlinearDimensionalityReduction - Methods: KPCA, MDS + - Functions: modelmatrix (X), - LatentVariableModel or LatentVariableDimensionalityReduction - Methods: FA, PPCA - - Methods: cov + - Functions: cov diff --git a/docs/src/index.md b/docs/src/index.md index 47f7ffd..7bc7063 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -11,7 +11,7 @@ end [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc. ```@contents -Pages = ["whiten.md", "lda.md", "api.md"] +Pages = ["whiten.md", "lda.md", "pca.md", "api.md"] Depth = 2 ``` diff --git a/docs/src/pca.md b/docs/src/pca.md new file mode 100644 index 0000000..31bd190 --- /dev/null +++ b/docs/src/pca.md @@ -0,0 +1,90 @@ +# Principal Component Analysis + +[Principal Component Analysis](http://en.wikipedia.org/wiki/Principal_component_analysis) (PCA) derives an orthogonal projection to convert a given set of observations to linearly uncorrelated variables, called *principal components*. + +## Example + +Performing [`PCA`](@ref) on *Iris* data set: + +```@example PCAex +using MultivariateStats, RDatasets, Plots +plotly() # using plotly for 3D-interacive graphing + +# load iris dataset +iris = dataset("datasets", "iris") + +# split half to training set +Xtr = Matrix(iris[1:2:end,1:4])' +Xtr_labels = Vector(iris[1:2:end,5]) + +# split other half to testing set +Xte = Matrix(iris[2:2:end,1:4])' +Xte_labels = Vector(iris[2:2:end,5]) +nothing # hide +``` + +Suppose `Xtr` and `Xte` are training and testing data matrix, with each observation in a column. +We train a PCA model, allowing up to 3 dimensions: + +```@example PCAex +M = fit(PCA, Xtr; maxoutdim=3) +``` + +Then, apply PCA model to the testing set + +```@example PCAex +Yte = predict(M, Xte) +``` + +And, reconstruct testing observations (approximately) to the original space + +```@example PCAex +Xr = reconstruct(M, Yte) +``` + +Now, we group results by testing set labels for color coding and visualize first 3 principal +components in 3D interactive plot + +```@example PCAex +setosa = Yte[:,Xte_labels.=="setosa"] +versicolor = Yte[:,Xte_labels.=="versicolor"] +virginica = Yte[:,Xte_labels.=="virginica"] + +p = scatter(setosa[1,:],setosa[2,:],setosa[3,:],marker=:circle,linewidth=0) +scatter!(versicolor[1,:],versicolor[2,:],versicolor[3,:],marker=:circle,linewidth=0) +scatter!(virginica[1,:],virginica[2,:],virginica[3,:],marker=:circle,linewidth=0) +plot!(p,xlabel="PC1",ylabel="PC2",zlabel="PC3") +``` + +## Linear Principal Component Analysis + +This package uses the [`PCA`](@ref) type to define a linear PCA model: + +```@docs +PCA +``` + +This type comes with several methods where ``M`` be an instance of [`PCA`](@ref), +``d`` be the dimension of observations, and ``p`` be the output dimension (*i.e* the dimension of the principal subspace). + +```@docs +fit(::Type{PCA}, ::AbstractMatrix{T}; kwargs) where {T<:Real} +predict(::PCA, ::AbstractVecOrMat{T}) where {T<:Real} +reconstruct(::PCA, ::AbstractVecOrMat{T}) where {T<:Real} +size(::PCA) +mean(M::PCA) +projection(M::PCA) +var(M::PCA) +tprincipalvar(M::PCA) +tresidualvar(M::PCA) +r2(M::PCA) +loadings(M::PCA) +eigvals(M::PCA) +eigvecs(M::PCA) +``` + +Auxiliary functions +```@docs +pcacov +pcasvd +``` diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 7600c2e..ce8a815 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -4,9 +4,10 @@ module MultivariateStats AbstractDataTransform, pairwise! import Statistics: mean, var, cov, covm import Base: length, size, show, dump - import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef, weights, dof, pairwise + import StatsBase: fit, predict, predict!, ConvergenceException, coef, weights, + dof, pairwise, r2 import SparseArrays - import LinearAlgebra: eigvals + import LinearAlgebra: eigvals, eigvecs export @@ -45,7 +46,7 @@ module MultivariateStats tprincipalvar, # total principal variance, i.e. sum(principalvars(M)) tresidualvar, # total residual variance - tvar, # total variance + loadings, # model loadings ## ppca PPCA, # Type: the Probabilistic PCA model @@ -97,8 +98,7 @@ module MultivariateStats betweenclass_scatter, # between-class scatter matrix multiclass_lda_stats, # compute statistics for multiclass LDA training multiclass_lda, # train multi-class LDA based on statistics - mclda_solve, # solve multi-class LDA projection given scatter matrices - mclda_solve!, # solve multi-class LDA projection (inputs are overriden) + mclda_solve, # solve multi-class LDA projection given sStatisticalModel ## ica ICA, # Type: the Fast ICA model @@ -113,6 +113,7 @@ module MultivariateStats facm # EM algorithm for probabilistic PCA ## source files + include("types.jl") include("common.jl") include("lreg.jl") include("whiten.jl") @@ -132,6 +133,10 @@ module MultivariateStats @deprecate outdim(f::MulticlassLDA) size(f::MulticlassLDA)[2] @deprecate indim(f::SubspaceLDA) size(f::SubspaceLDA)[1] @deprecate outdim(f::SubspaceLDA) size(f::SubspaceLDA)[2] + @deprecate indim(f::PCA) size(f::PCA)[1] + @deprecate outdim(f::PCA) size(f::PCA)[2] + @deprecate tvar(f::PCA) var(f::PCA) # total variance + @deprecate transform(f::PCA, x) predict(f::PCA, x) #ex=false # @deprecate transform(m, x; kwargs...) predict(m, x; kwargs...) #ex=false # @deprecate transform(m; kwargs...) predict(m; kwargs...) #ex=false diff --git a/src/pca.jl b/src/pca.jl index 417262c..be44216 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -1,55 +1,132 @@ # Principal Component Analysis -#### PCA type - -struct PCA{T<:Real} - mean::Vector{T} # sample mean: of length d (mean can be empty, which indicates zero mean) - proj::Matrix{T} # projection matrix: of size d x p - prinvars::Vector{T} # principal variances: of length p +""" +Linear Principal Component Analysis +""" +struct PCA{T<:Real} <: LinearDimensionalityReduction + mean::AbstractVector{T} # sample mean: of length d (mean can be empty, which indicates zero mean) + proj::AbstractMatrix{T} # projection matrix: of size d x p + prinvars::AbstractVector{T} # principal variances: of length p tprinvar::T # total principal variance, i.e. sum(prinvars) tvar::T # total input variance end ## constructor -function PCA(mean::Vector{T}, proj::Matrix{T}, pvars::Vector{T}, tvar::T) where {T<:Real} +function PCA(mean::AbstractVector{T}, proj::AbstractMatrix{T}, pvars::AbstractVector{T}, tvar::T) where {T<:Real} d, p = size(proj) (isempty(mean) || length(mean) == d) || - throw(DimensionMismatch("Dimensions of mean and proj are inconsistent.")) + throw(DimensionMismatch("Dimensions of mean and projection matrix are inconsistent.")) length(pvars) == p || - throw(DimensionMismatch("Dimensions of proj and pvars are inconsistent.")) + throw(DimensionMismatch("Dimensions of projection matrix and principal variables are inconsistent.")) tpvar = sum(pvars) tpvar <= tvar || isapprox(tpvar,tvar) || throw(ArgumentError("principal variance cannot exceed total variance.")) PCA(mean, proj, pvars, tpvar, tvar) end ## properties +""" + size(M) + +Returns a tuple with the dimensions of input (the dimension of the observation space) +and output (the dimension of the principal subspace). +""" +size(M::PCA) = size(M.proj) -indim(M::PCA) = size(M.proj, 1) -outdim(M::PCA) = size(M.proj, 2) +""" + mean(M::PCA) -mean(M::PCA) = fullmean(indim(M), M.mean) +Returns the mean vector (of length `d`). +""" +mean(M::PCA) = fullmean(size(M.proj,1), M.mean) +""" + projection(M::PCA) + +Returns the projection matrix (of size `(d, p)`). Each column of the projection matrix corresponds to a principal component. +The principal components are arranged in descending order of the corresponding variances. +""" projection(M::PCA) = M.proj +eigvecs(M::PCA) = projection(M) -principalvar(M::PCA, i::Int) = M.prinvars[i] +""" + principalvars(M::PCA) + +Returns the variances of principal components. +""" principalvars(M::PCA) = M.prinvars +principalvar(M::PCA, i::Int) = M.prinvars[i] +eigvals(M::PCA) = principalvars(M) +""" + tprincipalvar(M::PCA) + +Returns the total variance of principal components, which is equal to `sum(principalvars(M))`. +""" tprincipalvar(M::PCA) = M.tprinvar + +""" + tresidualvar(M::PCA) + +Returns the total residual variance. +""" tresidualvar(M::PCA) = M.tvar - M.tprinvar -tvar(M::PCA) = M.tvar -principalratio(M::PCA) = M.tprinvar / M.tvar +""" + var(M::PCA) + +Returns the total observation variance, which is equal to `tprincipalvar(M) + tresidualvar(M)`. +""" +var(M::PCA) = M.tvar + +""" + r2(M::PCA) + principalratio(M::PCA) + +Returns the ratio of variance preserved in the principal subspace, which is equal to `tprincipalvar(M) / var(M)`. +""" +r2(M::PCA) = M.tprinvar / M.tvar +const principalratio = r2 + +""" + loadings(M::PCA) + +Returns model loadings, i.e. the weights for each original variable when calculating the principal component. +""" +loadings(M::PCA) = sqrt.(principalvars(M))' .* projection(M) ## use -transform(M::PCA, x::AbstractVecOrMat{<:Real}) = transpose(M.proj) * centralize(x, M.mean) -reconstruct(M::PCA, y::AbstractVecOrMat{<:Real}) = decentralize(M.proj * y, M.mean) +""" + predict(M::PCA, x::AbstractVecOrMat{<:Real}) + +Given a PCA model `M`, retur transform observations `x` into principal components space, as + +\$\\mathbf{y} = \\mathbf{P}^T (\\mathbf{x} - \\boldsymbol{\\mu})\$ + +Here, `x` can be either a vector of length `d` or a matrix where each column is an observation, +and `\\mathbf{P}` is the projection matrix. +""" +predict(M::PCA, x::AbstractVecOrMat{T}) where {T<:Real} = transpose(M.proj) * centralize(x, M.mean) + +""" + reconstruct(M::PCA, y::AbstractVecOrMat{<:Real}) + +Given a PCA model `M`, returns a (approximately) reconstructed observations +from principal components space, as + +\$\\tilde{\\mathbf{x}} = \\mathbf{P} \\mathbf{y} + \\boldsymbol{\\mu}\$ + +Here, `y` can be either a vector of length `p` or a matrix where each column +gives the principal components for an observation, and \$\\mathbf{P}\$ is the projection matrix. +""" +reconstruct(M::PCA, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M.proj * y, M.mean) ## show & dump function show(io::IO, M::PCA) - print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") + idim, odim = size(M) + print(io, "PCA(indim = $idim, outdim = $odim, principalratio = $(r2(M)))") end function dump(io::IO, M::PCA) @@ -76,8 +153,8 @@ const default_pca_pratio = 0.99 function check_pcaparams(d::Int, mean::AbstractVector, md::Int, pr::Real) isempty(mean) || length(mean) == d || throw(DimensionMismatch("Incorrect length of mean.")) - md >= 1 || error("maxoutdim must be a positive integer.") - 0.0 < pr <= 1.0 || throw(ArgumentError("pratio must be a positive real value with pratio ≤ 1.0.")) + md >= 1 || error("`maxoutdim` parameter must be a positive integer.") + 0.0 < pr <= 1.0 || throw(ArgumentError("principal ratio must be a positive real value ≤ 1.0.")) end function choose_pcadim(v::AbstractVector{T}, ord::Vector{Int}, vsum::T, md::Int, @@ -94,8 +171,19 @@ end ## core algorithms +""" + pcacov(C, mean; ...) + +Compute and return a PCA model based on eigenvalue decomposition of a given covariance matrix `C`. -function pcacov(C::AbstractMatrix{T}, mean::Vector{T}; +**Parameters:** +- `C`: The covariance matrix of the samples. +- `mean`: The mean vector of original samples, which can be a vector of length `d`, + or an empty vector `Float64[]` indicating a zero mean. + +*Note:* This function accepts two keyword arguments: `maxoutdim` and `pratio`. +""" +function pcacov(C::AbstractMatrix{T}, mean::AbstractVector{T}; maxoutdim::Int=size(C,1), pratio::Real=default_pca_pratio) where {T<:Real} @@ -109,7 +197,20 @@ function pcacov(C::AbstractMatrix{T}, mean::Vector{T}; PCA(mean, P, v, vsum) end -function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real; +""" + pcasvd(Z, mean, tw; ...) + +Compute and return a PCA model based on singular value decomposition of a centralized sample matrix `Z`. + +**Parameters:** +- `Z`: a matrix of centralized samples. +- `mean`: The mean vector of the **original** samples, which can be a vector of length `d`, + or an empty vector `Float64[]` indicating a zero mean. +- `n`: a number of samples. + +*Note:* This function accepts two keyword arguments: `maxoutdim` and `pratio`. +""" +function pcasvd(Z::AbstractMatrix{T}, mean::AbstractVector{T}, n::Real; maxoutdim::Int=min(size(Z)...), pratio::Real=default_pca_pratio) where {T<:Real} @@ -118,7 +219,7 @@ function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real; v = Svd.S::Vector{T} U = Svd.U::Matrix{T} for i = 1:length(v) - @inbounds v[i] = abs2(v[i]) / tw + @inbounds v[i] = abs2(v[i]) / n end ord = sortperm(v; rev=true) vsum = sum(v) @@ -128,7 +229,32 @@ function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real; end ## interface functions +""" + fit(PCA, X; ...) + +Perform PCA over the data given in a matrix `X`. Each column of `X` is an **observation**. + +**Keyword arguments** + +- `method`: The choice of methods: + - `:auto`: use `:cov` when `d < n` or `:svd` otherwise (*default*). + - `:cov`: based on covariance matrix decomposition. + - `:svd`: based on SVD of the input data. +- `maxoutdim`: The output dimension, i.e. dimension of the transformed space (*min(d, nc-1)*) +- `pratio`: The ratio of variances preserved in the principal subspace (*0.99*) +- `mean`: The mean vector, which can be either of + - `0`: the input data has already been centralized + - `nothing`: this function will compute the mean (*default*) + - a pre-computed mean vector + +**Notes:** + +- The output dimension `p` depends on both `maxoutdim` and `pratio`, as follows. Suppose + the first `k` principal components preserve at least `pratio` of the total variance, while the + first `k-1` preserves less than `pratio`, then the actual output dimension will be \$\\min(k, maxoutdim)\$. +- This function calls [`pcacov`](@ref) or [`pcasvd`](@ref) internally, depending on the choice of method. +""" function fit(::Type{PCA}, X::AbstractMatrix{T}; method::Symbol=:auto, maxoutdim::Int=size(X,1), diff --git a/src/types.jl b/src/types.jl new file mode 100644 index 0000000..6ff1372 --- /dev/null +++ b/src/types.jl @@ -0,0 +1,27 @@ +abstract type AbstractDimensionalityReduction <: RegressionModel end + +""" + projection(model::AbstractDimensionalityReduction) + +Return the projection matrix of the model. +""" +projection(model::AbstractDimensionalityReduction) = error("'projection' is not defined for $(typeof(model)).") + +""" + reconstruct(model::AbstractDimensionalityReduction, y) + +Return the model response (a.k.a. the dependent variable). +""" +reconstruct(model::AbstractDimensionalityReduction, y) = error("'reconstruct' is not defined for $(typeof(model)).") + +abstract type LinearDimensionalityReduction <: AbstractDimensionalityReduction end + +""" + loadings(model::LinearDimensionalityReduction) + +Return the model loadings (a.k.a. eigenvectors scaled up by the variances). +""" +loadings(model::LinearDimensionalityReduction) = error("'loadings' is not defined for $(typeof(model)).") + +abstract type NonlinearDimensionalityReduction <: AbstractDimensionalityReduction end +abstract type LatentVariableDimensionalityReduction <: AbstractDimensionalityReduction end diff --git a/test/pca.jl b/test/pca.jl index 6336bba..00c36f9 100644 --- a/test/pca.jl +++ b/test/pca.jl @@ -1,7 +1,7 @@ using MultivariateStats using LinearAlgebra using Test -import Statistics: mean, cov +import Statistics: mean, var, cov import Random import SparseArrays @@ -16,21 +16,26 @@ import SparseArrays P = qr(randn(5, 5)).Q[:, 1:3] pvars = [5., 4., 3.] + l = [-0.809509 -1.14456 0.944145 + -0.738713 -1.23353 -0.607874; + -1.64431 0.875826 -0.479549; + -0.816033 0.613632 1.06775 ; + 0.655236 0.157369 0.607475] M = PCA(Float64[], P, pvars, 15.0) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5, 3) @test mean(M) == zeros(5) @test projection(M) == P @test principalvars(M) == pvars @test principalvar(M, 2) == pvars[2] - @test tvar(M) == 15.0 + @test var(M) == 15.0 @test tprincipalvar(M) == 12.0 @test tresidualvar(M) == 3.0 @test principalratio(M) == 0.8 + @test isapprox(loadings(M),l, atol = 0.001) - @test transform(M, X[:,1]) ≈ P'X[:,1] - @test transform(M, X) ≈ P'X + @test predict(M, X[:,1]) ≈ P'X[:,1] + @test predict(M, X) ≈ P'X @test reconstruct(M, Y[:,1]) ≈ P * Y[:,1] @test reconstruct(M, Y) ≈ P * Y @@ -41,19 +46,18 @@ import SparseArrays mval = rand(5) M = PCA(mval, P, pvars, 15.0) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5,3) @test mean(M) == mval @test projection(M) == P @test principalvars(M) == pvars @test principalvar(M, 2) == pvars[2] - @test tvar(M) == 15.0 + @test var(M) == 15.0 @test tprincipalvar(M) == 12.0 @test tresidualvar(M) == 3.0 @test principalratio(M) == 0.8 - @test transform(M, X[:,1]) ≈ P' * (X[:,1] .- mval) - @test transform(M, X) ≈ P' * (X .- mval) + @test predict(M, X[:,1]) ≈ P' * (X[:,1] .- mval) + @test predict(M, X) ≈ P' * (X .- mval) @test reconstruct(M, Y[:,1]) ≈ P * Y[:,1] .+ mval @test reconstruct(M, Y) ≈ P * Y .+ mval @@ -82,16 +86,15 @@ import SparseArrays P = projection(M) pvs = principalvars(M) - @test indim(M) == 5 - @test outdim(M) == 5 + @test size(M) == (5,5) @test mean(M) == mval @test P'P ≈ Matrix(I, 5, 5) @test C*P ≈ P*Diagonal(pvs) @test issorted(pvs; rev=true) @test pvs ≈ pvs0 - @test tvar(M) ≈ tv - @test sum(pvs) ≈ tvar(M) - @test reconstruct(M, transform(M, X)) ≈ X + @test var(M) ≈ tv + @test sum(pvs) ≈ var(M) + @test reconstruct(M, predict(M, X)) ≈ X M = fit(PCA, X; mean=mval) @test projection(M) ≈ P @@ -102,15 +105,13 @@ import SparseArrays M = fit(PCA, X; maxoutdim=3) P = projection(M) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5,3) @test P'P ≈ Matrix(I, 3, 3) @test issorted(pvs; rev=true) M = fit(PCA, X; pratio=0.85) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5,3) @test P'P ≈ Matrix(I, 3, 3) @test issorted(pvs; rev=true) @@ -120,16 +121,15 @@ import SparseArrays P = projection(M) pvs = principalvars(M) - @test indim(M) == 5 - @test outdim(M) == 5 + @test size(M) == (5,5) @test mean(M) == mval @test P'P ≈ Matrix(I, 5, 5) @test isapprox(C*P, P*Diagonal(pvs), atol=1.0e-3) @test issorted(pvs; rev=true) @test isapprox(pvs, pvs0, atol=1.0e-3) - @test isapprox(tvar(M), tv, atol=1.0e-3) - @test sum(pvs) ≈ tvar(M) - @test reconstruct(M, transform(M, X)) ≈ X + @test isapprox(var(M), tv, atol=1.0e-3) + @test sum(pvs) ≈ var(M) + @test reconstruct(M, predict(M, X)) ≈ X M = fit(PCA, X; method=:svd, mean=mval) @test projection(M) ≈ P @@ -140,15 +140,13 @@ import SparseArrays M = fit(PCA, X; method=:svd, maxoutdim=3) P = projection(M) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5,3) @test P'P ≈ Matrix(I, 3, 3) @test issorted(pvs; rev=true) M = fit(PCA, X; method=:svd, pratio=0.85) - @test indim(M) == 5 - @test outdim(M) == 3 + @test size(M) == (5,3) @test P'P ≈ Matrix(I, 3, 3) @test issorted(pvs; rev=true) @@ -166,8 +164,8 @@ import SparseArrays fit(PCA, X ; pratio=pp) fit(PCA, XX ; pratio=p) fit(PCA, XX ; pratio=pp) - transform(M, XX) - transform(MM, X) + predict(M, XX) + predict(MM, X) reconstruct(M, YY) reconstruct(MM, Y)