From c0f74de21093ca4433d44187fec11bee4723a56c Mon Sep 17 00:00:00 2001
From: Art <wildart@users.noreply.github.com>
Date: Thu, 28 Oct 2021 16:43:31 -0400
Subject: [PATCH] Refactor PCA and docs (#163)

* add new DR subtypes and subtype PCA class

* add `loadings` & fix tests (close #123)

* migrated PCA docs

* allow the nightly build to fail in CI

* fixed deprecated calls in tests

* Relax type-asserts in PCA (close #140, close #141)
---
 .github/workflows/ci.yml |   9 +-
 docs/Project.toml        |   2 +
 docs/make.jl             |   4 +-
 docs/src/api.md          |  37 ++++-----
 docs/src/index.md        |   2 +-
 docs/src/pca.md          |  90 ++++++++++++++++++++
 src/MultivariateStats.jl |  15 ++--
 src/pca.jl               | 172 +++++++++++++++++++++++++++++++++------
 src/types.jl             |  27 ++++++
 test/pca.jl              |  60 +++++++-------
 10 files changed, 337 insertions(+), 81 deletions(-)
 create mode 100644 docs/src/pca.md
 create mode 100644 src/types.jl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dc395cd..b3cd9b9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,19 +9,26 @@ jobs:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
+    continue-on-error: ${{ matrix.allow-to-fail }}
     strategy:
       fail-fast: false
       matrix:
         version:
           - '1.1'
           - '1' # automatically expands to the latest stable 1.x release of Julia
-          - 'nightly'
+          # - 'nightly'
         os:
           - ubuntu-latest
           # - macOS-latest
           # - windows-latest
         arch:
           - x64
+        allow-to-fail: [false]
+        include:
+          - version: 'nightly'
+            os: ubuntu-latest
+            arch: x64
+            allow-to-fail: true
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
diff --git a/docs/Project.toml b/docs/Project.toml
index 853eb6f..7c34cbb 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,6 +1,8 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
diff --git a/docs/make.jl b/docs/make.jl
index d4cdf4b..684bd93 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,7 +7,9 @@ end
 makedocs(
     sitename = "MultivariateStats.jl",
     modules = [MultivariateStats],
-    pages = ["Home"=>"index.md", "whiten.md", "lda.md", "Development"=>"api.md"]
+    pages = ["Home"=>"index.md",
+             "whiten.md", "lda.md", "pca.md",
+             "Development"=>"api.md"]
 )
 
 deploydocs(
diff --git a/docs/src/api.md b/docs/src/api.md
index 9cc1bde..56f2c40 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -20,8 +20,8 @@ Table of the package models and corresponding function names used by these model
 |loadings          |  ?  |     |     |  ?  |  x  |  x  |  ?  |  ?  |  ?  |
 |eigvals           |     |     |     |     |  ?  |  ?  |  ?  |  ?  |  x  |
 |eigvecs           |     |     |     |     | ?   |  ?  |  ?  |  ?  |  ?  |
-|length            |     |     |     |     |    |    |    |    |    |
-|size              |     |     |     |     |    |    |    |    |    |
+|length            |     |     |     |     |     |     |     |     |     |
+|size              |     |     |     |     |     |     |     |     |     |
 
 Note: `?` refers to a possible implementation that is missing or called differently.
 
@@ -35,44 +35,43 @@ Note: `?` refers to a possible implementation that is missing or called differen
 |indim             |  -  |     |     |  -   |   -  |  x  |  x  |  x  |  x  |  x  |  x  |
 |outdim            |  -  |  x  |     |  -   |   -  |  x  |  x  |  x  |  x  |  x  |  x  |
 |mean              |  x  |  x  |     |  x   |   x  |  x  |  x  |  x  |  x  |  ?  |     |
-|var               |     |     |     |      |      |     |  x  |  x  |  ?  |  ?  |  ?  |
-|cov               |     |     |     |      |      |     |  x  |  ?  |     |     |     |
+|var               |     |     |     |      |      |     |  x  |  x  |  x  |  ?  |  ?  |
+|cov               |     |     |     |      |      |     |  x  |  x  |     |     |     |
 |cor               |     |  x  |     |      |      |     |     |     |     |     |     |
 |projection        |  ?  |  x  |     |  x   |   x  |     |  x  |  x  |  x  |  x  |  x  |
 |reconstruct       |     |     |     |      |      |     |  x  |  x  |  x  |  x  |     |
-|loadings          |     |  ?  |     |      |      |     |  x  |  x  |  ?  |  ?  |  ?  |
-|eigvals           |     |     |     |      |   +  |     |  ?  |  ?  |  ?  |  ?  |  x  |
-|eigvecs           |     |     |     |      |      |     | ?   |  ?  |  ?  |  ?  |  ?  |
+|loadings          |     |  ?  |     |      |      |     |  x  |  x  |  x  |  ?  |  ?  |
+|eigvals           |     |     |     |      |   +  |     |  ?  |  ?  |  x  |  ?  |  x  |
+|eigvecs           |     |     |     |      |      |     |  ?  |  ?  |  x  |  ?  |  ?  |
 |length            |  +  |     |  +  |  +   |   +  |     |     |     |     |     |     |
-|size              |  +  |     |     |  +   |   +  |     |     |     |     |     |     |
+|size              |  +  |     |     |  +   |   +  |     |     |     |  x  |     |     |
 |                  |     |     |     |      |      |     |     |     |     |     |     |
 
 - StatsBase.AbstractDataTransform
     - Whitening
-      - Interface: fit, transfrom
+      - Interface: fit, transform
       - New: length, mean, size
 - StatsBase.RegressionModel
+    - *Interface:* fit, predict
     - LinearDiscriminant
-      - Methods:
-        - Interface: fit, predict, coef, dof, weights
-        - New: evaluate, length
+      - Functions: coef, dof, weights, evaluate, length
     - MulticlassLDA
-      - Methods: fit, predict, size, mean, projection
-      - New: length
+      - Functions: size, mean, projection, length
     - SubspaceLDA
-      - Methods: fit, predict, size, mean, projection
-      - New: length, eigvals
+      - Functions: size, mean, projection, length, eigvals
     - CCA
-      - Methods: fit, transfrom, indim, outdim, mean
+      - Functions: indim, outdim, mean
     - Subtypes:
         - AbstractDimensionalityReduction
-        - Methods: projection, var, reconstruct, loadings
+          - *Interface:* projection, var, reconstruct, loadings
+          - *Interface:* projection == weights
         - Subtypes:
             - LinearDimensionalityReduction
                 - Methods: ICA, PCA
             - NonlinearDimensionalityReduction
                 - Methods: KPCA, MDS
+                  - Functions: modelmatrix (X),
             - LatentVariableModel or LatentVariableDimensionalityReduction
                 - Methods: FA, PPCA
-                - Methods: cov
+                  - Functions: cov
 
diff --git a/docs/src/index.md b/docs/src/index.md
index 47f7ffd..7bc7063 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -11,7 +11,7 @@ end
 [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc.
 
 ```@contents
-Pages = ["whiten.md", "lda.md", "api.md"]
+Pages = ["whiten.md", "lda.md", "pca.md", "api.md"]
 Depth = 2
 ```
 
diff --git a/docs/src/pca.md b/docs/src/pca.md
new file mode 100644
index 0000000..31bd190
--- /dev/null
+++ b/docs/src/pca.md
@@ -0,0 +1,90 @@
+# Principal Component Analysis
+
+[Principal Component Analysis](http://en.wikipedia.org/wiki/Principal_component_analysis) (PCA) derives an orthogonal projection to convert a given set of observations to linearly uncorrelated variables, called *principal components*.
+
+## Example
+
+Performing [`PCA`](@ref) on *Iris* data set:
+
+```@example PCAex
+using MultivariateStats, RDatasets, Plots
+plotly() # using plotly for 3D-interacive graphing
+
+# load iris dataset
+iris = dataset("datasets", "iris")
+
+# split half to training set
+Xtr = Matrix(iris[1:2:end,1:4])'
+Xtr_labels = Vector(iris[1:2:end,5])
+
+# split other half to testing set
+Xte = Matrix(iris[2:2:end,1:4])'
+Xte_labels = Vector(iris[2:2:end,5])
+nothing # hide
+```
+
+Suppose `Xtr` and `Xte` are training and testing data matrix, with each observation in a column.
+We train a PCA model, allowing up to 3 dimensions:
+
+```@example PCAex
+M = fit(PCA, Xtr; maxoutdim=3)
+```
+
+Then, apply PCA model to the testing set
+
+```@example PCAex
+Yte = predict(M, Xte)
+```
+
+And, reconstruct testing observations (approximately) to the original space
+
+```@example PCAex
+Xr = reconstruct(M, Yte)
+```
+
+Now, we group results by testing set labels for color coding and visualize first 3 principal
+components in 3D interactive plot
+
+```@example PCAex
+setosa = Yte[:,Xte_labels.=="setosa"]
+versicolor = Yte[:,Xte_labels.=="versicolor"]
+virginica = Yte[:,Xte_labels.=="virginica"]
+
+p = scatter(setosa[1,:],setosa[2,:],setosa[3,:],marker=:circle,linewidth=0)
+scatter!(versicolor[1,:],versicolor[2,:],versicolor[3,:],marker=:circle,linewidth=0)
+scatter!(virginica[1,:],virginica[2,:],virginica[3,:],marker=:circle,linewidth=0)
+plot!(p,xlabel="PC1",ylabel="PC2",zlabel="PC3")
+```
+
+## Linear Principal Component Analysis
+
+This package uses the [`PCA`](@ref) type to define a linear PCA model:
+
+```@docs
+PCA
+```
+
+This type comes with several methods where ``M`` be an instance of  [`PCA`](@ref),
+``d`` be the dimension of observations, and ``p`` be the output dimension (*i.e* the dimension of the principal subspace).
+
+```@docs
+fit(::Type{PCA}, ::AbstractMatrix{T}; kwargs) where {T<:Real}
+predict(::PCA, ::AbstractVecOrMat{T}) where {T<:Real}
+reconstruct(::PCA, ::AbstractVecOrMat{T}) where {T<:Real}
+size(::PCA)
+mean(M::PCA)
+projection(M::PCA)
+var(M::PCA)
+tprincipalvar(M::PCA)
+tresidualvar(M::PCA)
+r2(M::PCA)
+loadings(M::PCA)
+eigvals(M::PCA)
+eigvecs(M::PCA)
+```
+
+Auxiliary functions
+```@docs
+pcacov
+pcasvd
+```
diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl
index 7600c2e..ce8a815 100644
--- a/src/MultivariateStats.jl
+++ b/src/MultivariateStats.jl
@@ -4,9 +4,10 @@ module MultivariateStats
                      AbstractDataTransform, pairwise!
     import Statistics: mean, var, cov, covm
     import Base: length, size, show, dump
-    import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef, weights, dof, pairwise
+    import StatsBase: fit, predict, predict!, ConvergenceException, coef, weights,
+                      dof, pairwise, r2
     import SparseArrays
-    import LinearAlgebra: eigvals
+    import LinearAlgebra: eigvals, eigvecs
 
     export
 
@@ -45,7 +46,7 @@ module MultivariateStats
 
     tprincipalvar,      # total principal variance, i.e. sum(principalvars(M))
     tresidualvar,       # total residual variance
-    tvar,               # total variance
+    loadings,           # model loadings
 
     ## ppca
     PPCA,               # Type: the Probabilistic PCA model
@@ -97,8 +98,7 @@ module MultivariateStats
     betweenclass_scatter,   # between-class scatter matrix
     multiclass_lda_stats,   # compute statistics for multiclass LDA training
     multiclass_lda,         # train multi-class LDA based on statistics
-    mclda_solve,            # solve multi-class LDA projection given scatter matrices
-    mclda_solve!,           # solve multi-class LDA projection (inputs are overriden)
+    mclda_solve,            # solve multi-class LDA projection given sStatisticalModel
 
     ## ica
     ICA,                    # Type: the Fast ICA model
@@ -113,6 +113,7 @@ module MultivariateStats
     facm                    # EM algorithm for probabilistic PCA
 
     ## source files
+    include("types.jl")
     include("common.jl")
     include("lreg.jl")
     include("whiten.jl")
@@ -132,6 +133,10 @@ module MultivariateStats
     @deprecate outdim(f::MulticlassLDA) size(f::MulticlassLDA)[2]
     @deprecate indim(f::SubspaceLDA) size(f::SubspaceLDA)[1]
     @deprecate outdim(f::SubspaceLDA) size(f::SubspaceLDA)[2]
+    @deprecate indim(f::PCA) size(f::PCA)[1]
+    @deprecate outdim(f::PCA) size(f::PCA)[2]
+    @deprecate tvar(f::PCA) var(f::PCA) # total variance
+    @deprecate transform(f::PCA, x) predict(f::PCA, x) #ex=false
     # @deprecate transform(m, x; kwargs...) predict(m, x; kwargs...) #ex=false
     # @deprecate transform(m; kwargs...) predict(m; kwargs...) #ex=false
 
diff --git a/src/pca.jl b/src/pca.jl
index 417262c..be44216 100644
--- a/src/pca.jl
+++ b/src/pca.jl
@@ -1,55 +1,132 @@
 # Principal Component Analysis
 
-#### PCA type
-
-struct PCA{T<:Real}
-    mean::Vector{T}       # sample mean: of length d (mean can be empty, which indicates zero mean)
-    proj::Matrix{T}       # projection matrix: of size d x p
-    prinvars::Vector{T}   # principal variances: of length p
+"""
+Linear Principal Component Analysis
+"""
+struct PCA{T<:Real} <: LinearDimensionalityReduction
+    mean::AbstractVector{T}       # sample mean: of length d (mean can be empty, which indicates zero mean)
+    proj::AbstractMatrix{T}       # projection matrix: of size d x p
+    prinvars::AbstractVector{T}   # principal variances: of length p
     tprinvar::T           # total principal variance, i.e. sum(prinvars)
     tvar::T               # total input variance
 end
 
 ## constructor
 
-function PCA(mean::Vector{T}, proj::Matrix{T}, pvars::Vector{T}, tvar::T) where {T<:Real}
+function PCA(mean::AbstractVector{T}, proj::AbstractMatrix{T}, pvars::AbstractVector{T}, tvar::T) where {T<:Real}
     d, p = size(proj)
     (isempty(mean) || length(mean) == d) ||
-        throw(DimensionMismatch("Dimensions of mean and proj are inconsistent."))
+        throw(DimensionMismatch("Dimensions of mean and projection matrix are inconsistent."))
     length(pvars) == p ||
-        throw(DimensionMismatch("Dimensions of proj and pvars are inconsistent."))
+        throw(DimensionMismatch("Dimensions of projection matrix and principal variables are inconsistent."))
     tpvar = sum(pvars)
     tpvar <= tvar || isapprox(tpvar,tvar) || throw(ArgumentError("principal variance cannot exceed total variance."))
     PCA(mean, proj, pvars, tpvar, tvar)
 end
 
 ## properties
+"""
+    size(M)
+
+Returns a tuple with the dimensions of input (the dimension of the observation space)
+and output (the dimension of the principal subspace).
+"""
+size(M::PCA) = size(M.proj)
 
-indim(M::PCA) = size(M.proj, 1)
-outdim(M::PCA) = size(M.proj, 2)
+"""
+    mean(M::PCA)
 
-mean(M::PCA) = fullmean(indim(M), M.mean)
+Returns the mean vector (of length `d`).
+"""
+mean(M::PCA) = fullmean(size(M.proj,1), M.mean)
 
+"""
+    projection(M::PCA)
+
+Returns the projection matrix (of size `(d, p)`). Each column of the projection matrix corresponds to a principal component.
+The principal components are arranged in descending order of the corresponding variances.
+"""
 projection(M::PCA) = M.proj
+eigvecs(M::PCA) = projection(M)
 
-principalvar(M::PCA, i::Int) = M.prinvars[i]
+"""
+    principalvars(M::PCA)
+
+Returns the variances of principal components.
+"""
 principalvars(M::PCA) = M.prinvars
+principalvar(M::PCA, i::Int) = M.prinvars[i]
+eigvals(M::PCA) = principalvars(M)
 
+"""
+    tprincipalvar(M::PCA)
+
+Returns the total variance of principal components, which is equal to `sum(principalvars(M))`.
+"""
 tprincipalvar(M::PCA) = M.tprinvar
+
+"""
+    tresidualvar(M::PCA)
+
+Returns the total residual variance.
+"""
 tresidualvar(M::PCA) = M.tvar - M.tprinvar
-tvar(M::PCA) = M.tvar
 
-principalratio(M::PCA) = M.tprinvar / M.tvar
+"""
+    var(M::PCA)
+
+Returns the total observation variance, which is equal to `tprincipalvar(M) + tresidualvar(M)`.
+"""
+var(M::PCA) = M.tvar
+
+"""
+    r2(M::PCA)
+    principalratio(M::PCA)
+
+Returns the ratio of variance preserved in the principal subspace, which is equal to `tprincipalvar(M) / var(M)`.
+"""
+r2(M::PCA) = M.tprinvar / M.tvar
+const principalratio = r2
+
+"""
+    loadings(M::PCA)
+
+Returns model loadings, i.e. the weights for each original variable when calculating the principal component.
+"""
+loadings(M::PCA) = sqrt.(principalvars(M))' .* projection(M)
 
 ## use
 
-transform(M::PCA, x::AbstractVecOrMat{<:Real}) = transpose(M.proj) * centralize(x, M.mean)
-reconstruct(M::PCA, y::AbstractVecOrMat{<:Real}) = decentralize(M.proj * y, M.mean)
+"""
+    predict(M::PCA, x::AbstractVecOrMat{<:Real})
+
+Given a PCA model `M`, retur transform observations `x` into principal components space, as
+
+\$\\mathbf{y} = \\mathbf{P}^T (\\mathbf{x} - \\boldsymbol{\\mu})\$
+
+Here, `x` can be either a vector of length `d` or a matrix where each column is an observation,
+and `\\mathbf{P}` is the projection matrix.
+"""
+predict(M::PCA, x::AbstractVecOrMat{T}) where {T<:Real} = transpose(M.proj) * centralize(x, M.mean)
+
+"""
+    reconstruct(M::PCA, y::AbstractVecOrMat{<:Real})
+
+Given a PCA model `M`, returns a (approximately) reconstructed observations
+from principal components space, as
+
+\$\\tilde{\\mathbf{x}} = \\mathbf{P} \\mathbf{y} + \\boldsymbol{\\mu}\$
+
+Here, `y` can be either a vector of length `p` or a matrix where each column
+gives the principal components for an observation, and \$\\mathbf{P}\$ is the projection matrix.
+"""
+reconstruct(M::PCA, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M.proj * y, M.mean)
 
 ## show & dump
 
 function show(io::IO, M::PCA)
-    print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))")
+    idim, odim = size(M)
+    print(io, "PCA(indim = $idim, outdim = $odim, principalratio = $(r2(M)))")
 end
 
 function dump(io::IO, M::PCA)
@@ -76,8 +153,8 @@ const default_pca_pratio = 0.99
 function check_pcaparams(d::Int, mean::AbstractVector, md::Int, pr::Real)
     isempty(mean) || length(mean) == d ||
         throw(DimensionMismatch("Incorrect length of mean."))
-    md >= 1 || error("maxoutdim must be a positive integer.")
-    0.0 < pr <= 1.0 || throw(ArgumentError("pratio must be a positive real value with pratio ≤ 1.0."))
+    md >= 1 || error("`maxoutdim` parameter must be a positive integer.")
+    0.0 < pr <= 1.0 || throw(ArgumentError("principal ratio must be a positive real value ≤ 1.0."))
 end
 
 function choose_pcadim(v::AbstractVector{T}, ord::Vector{Int}, vsum::T, md::Int,
@@ -94,8 +171,19 @@ end
 
 
 ## core algorithms
+"""
+    pcacov(C, mean; ...)
+
+Compute and return a PCA model based on eigenvalue decomposition of a given covariance matrix `C`.
 
-function pcacov(C::AbstractMatrix{T}, mean::Vector{T};
+**Parameters:**
+- `C`: The covariance matrix of the samples.
+- `mean`: The mean vector of original samples, which can be a vector of length `d`,
+           or an empty vector `Float64[]` indicating a zero mean.
+
+*Note:* This function accepts two keyword arguments: `maxoutdim` and `pratio`.
+"""
+function pcacov(C::AbstractMatrix{T}, mean::AbstractVector{T};
                 maxoutdim::Int=size(C,1),
                 pratio::Real=default_pca_pratio) where {T<:Real}
 
@@ -109,7 +197,20 @@ function pcacov(C::AbstractMatrix{T}, mean::Vector{T};
     PCA(mean, P, v, vsum)
 end
 
-function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real;
+"""
+    pcasvd(Z, mean, tw; ...)
+
+Compute and return a PCA model based on singular value decomposition of a centralized sample matrix `Z`.
+
+**Parameters:**
+- `Z`: a matrix of centralized samples.
+- `mean`: The mean vector of the **original** samples, which can be a vector of length `d`,
+          or an empty vector `Float64[]` indicating a zero mean.
+- `n`: a number of samples.
+
+*Note:* This function accepts two keyword arguments: `maxoutdim` and `pratio`.
+"""
+function pcasvd(Z::AbstractMatrix{T}, mean::AbstractVector{T}, n::Real;
                 maxoutdim::Int=min(size(Z)...),
                 pratio::Real=default_pca_pratio) where {T<:Real}
 
@@ -118,7 +219,7 @@ function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real;
     v = Svd.S::Vector{T}
     U = Svd.U::Matrix{T}
     for i = 1:length(v)
-        @inbounds v[i] = abs2(v[i]) / tw
+        @inbounds v[i] = abs2(v[i]) / n
     end
     ord = sortperm(v; rev=true)
     vsum = sum(v)
@@ -128,7 +229,32 @@ function pcasvd(Z::AbstractMatrix{T}, mean::Vector{T}, tw::Real;
 end
 
 ## interface functions
+"""
+    fit(PCA, X; ...)
+
+Perform PCA over the data given in a matrix `X`. Each column of `X` is an **observation**.
+
+**Keyword arguments**
+
+- `method`: The choice of methods:
+    - `:auto`: use `:cov` when `d < n` or `:svd` otherwise (*default*).
+    - `:cov`: based on covariance matrix decomposition.
+    - `:svd`: based on SVD of the input data.
+- `maxoutdim`: The output dimension, i.e. dimension of the transformed space (*min(d, nc-1)*)
+- `pratio`: The ratio of variances preserved in the principal subspace (*0.99*)
+- `mean`: The mean vector, which can be either of
+    - `0`: the input data has already been centralized
+    - `nothing`: this function will compute the mean (*default*)
+    - a pre-computed mean vector
+
+**Notes:**
+
+- The output dimension `p` depends on both `maxoutdim` and `pratio`, as follows. Suppose
+  the first `k` principal components preserve at least `pratio` of the total variance, while the
+  first `k-1` preserves less than `pratio`, then the actual output dimension will be \$\\min(k, maxoutdim)\$.
 
+- This function calls [`pcacov`](@ref) or [`pcasvd`](@ref) internally, depending on the choice of method.
+"""
 function fit(::Type{PCA}, X::AbstractMatrix{T};
              method::Symbol=:auto,
              maxoutdim::Int=size(X,1),
diff --git a/src/types.jl b/src/types.jl
new file mode 100644
index 0000000..6ff1372
--- /dev/null
+++ b/src/types.jl
@@ -0,0 +1,27 @@
+abstract type AbstractDimensionalityReduction <: RegressionModel end
+
+"""
+    projection(model::AbstractDimensionalityReduction)
+
+Return the projection matrix of the model.
+"""
+projection(model::AbstractDimensionalityReduction) = error("'projection' is not defined for $(typeof(model)).")
+
+"""
+    reconstruct(model::AbstractDimensionalityReduction, y)
+
+Return the model response (a.k.a. the dependent variable).
+"""
+reconstruct(model::AbstractDimensionalityReduction, y) = error("'reconstruct' is not defined for $(typeof(model)).")
+
+abstract type LinearDimensionalityReduction <: AbstractDimensionalityReduction end
+
+"""
+    loadings(model::LinearDimensionalityReduction)
+
+Return the model loadings (a.k.a. eigenvectors scaled up by the variances).
+"""
+loadings(model::LinearDimensionalityReduction) = error("'loadings' is not defined for $(typeof(model)).")
+
+abstract type NonlinearDimensionalityReduction <: AbstractDimensionalityReduction end
+abstract type LatentVariableDimensionalityReduction <: AbstractDimensionalityReduction end
diff --git a/test/pca.jl b/test/pca.jl
index 6336bba..00c36f9 100644
--- a/test/pca.jl
+++ b/test/pca.jl
@@ -1,7 +1,7 @@
 using MultivariateStats
 using LinearAlgebra
 using Test
-import Statistics: mean, cov
+import Statistics: mean, var, cov
 import Random
 import SparseArrays
 
@@ -16,21 +16,26 @@ import SparseArrays
 
     P = qr(randn(5, 5)).Q[:, 1:3]
     pvars = [5., 4., 3.]
+    l = [-0.809509  -1.14456    0.944145
+         -0.738713  -1.23353   -0.607874;
+         -1.64431    0.875826  -0.479549;
+         -0.816033   0.613632   1.06775 ;
+          0.655236   0.157369   0.607475]
     M = PCA(Float64[], P, pvars, 15.0)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5, 3)
     @test mean(M) == zeros(5)
     @test projection(M) == P
     @test principalvars(M) == pvars
     @test principalvar(M, 2) == pvars[2]
-    @test tvar(M) == 15.0
+    @test var(M) == 15.0
     @test tprincipalvar(M) == 12.0
     @test tresidualvar(M) == 3.0
     @test principalratio(M) == 0.8
+    @test isapprox(loadings(M),l, atol = 0.001)
 
-    @test transform(M, X[:,1]) ≈ P'X[:,1]
-    @test transform(M, X) ≈ P'X
+    @test predict(M, X[:,1]) ≈ P'X[:,1]
+    @test predict(M, X) ≈ P'X
 
     @test reconstruct(M, Y[:,1]) ≈ P * Y[:,1]
     @test reconstruct(M, Y) ≈ P * Y
@@ -41,19 +46,18 @@ import SparseArrays
     mval = rand(5)
     M = PCA(mval, P, pvars, 15.0)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5,3)
     @test mean(M) == mval
     @test projection(M) == P
     @test principalvars(M) == pvars
     @test principalvar(M, 2) == pvars[2]
-    @test tvar(M) == 15.0
+    @test var(M) == 15.0
     @test tprincipalvar(M) == 12.0
     @test tresidualvar(M) == 3.0
     @test principalratio(M) == 0.8
 
-    @test transform(M, X[:,1]) ≈ P' * (X[:,1] .- mval)
-    @test transform(M, X) ≈ P' * (X .- mval)
+    @test predict(M, X[:,1]) ≈ P' * (X[:,1] .- mval)
+    @test predict(M, X) ≈ P' * (X .- mval)
 
     @test reconstruct(M, Y[:,1]) ≈ P * Y[:,1] .+ mval
     @test reconstruct(M, Y) ≈ P * Y .+ mval
@@ -82,16 +86,15 @@ import SparseArrays
     P = projection(M)
     pvs = principalvars(M)
 
-    @test indim(M) == 5
-    @test outdim(M) == 5
+    @test size(M) == (5,5)
     @test mean(M) == mval
     @test P'P ≈ Matrix(I, 5, 5)
     @test C*P ≈ P*Diagonal(pvs)
     @test issorted(pvs; rev=true)
     @test pvs ≈ pvs0
-    @test tvar(M) ≈ tv
-    @test sum(pvs) ≈ tvar(M)
-    @test reconstruct(M, transform(M, X)) ≈ X
+    @test var(M) ≈ tv
+    @test sum(pvs) ≈ var(M)
+    @test reconstruct(M, predict(M, X)) ≈ X
 
     M = fit(PCA, X; mean=mval)
     @test projection(M) ≈ P
@@ -102,15 +105,13 @@ import SparseArrays
     M = fit(PCA, X; maxoutdim=3)
     P = projection(M)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5,3)
     @test P'P ≈ Matrix(I, 3, 3)
     @test issorted(pvs; rev=true)
 
     M = fit(PCA, X; pratio=0.85)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5,3)
     @test P'P ≈ Matrix(I, 3, 3)
     @test issorted(pvs; rev=true)
 
@@ -120,16 +121,15 @@ import SparseArrays
     P = projection(M)
     pvs = principalvars(M)
 
-    @test indim(M) == 5
-    @test outdim(M) == 5
+    @test size(M) == (5,5)
     @test mean(M) == mval
     @test P'P ≈ Matrix(I, 5, 5)
     @test isapprox(C*P, P*Diagonal(pvs), atol=1.0e-3)
     @test issorted(pvs; rev=true)
     @test isapprox(pvs, pvs0, atol=1.0e-3)
-    @test isapprox(tvar(M), tv, atol=1.0e-3)
-    @test sum(pvs) ≈ tvar(M)
-    @test reconstruct(M, transform(M, X)) ≈ X
+    @test isapprox(var(M), tv, atol=1.0e-3)
+    @test sum(pvs) ≈ var(M)
+    @test reconstruct(M, predict(M, X)) ≈ X
 
     M = fit(PCA, X; method=:svd, mean=mval)
     @test projection(M) ≈ P
@@ -140,15 +140,13 @@ import SparseArrays
     M = fit(PCA, X; method=:svd, maxoutdim=3)
     P = projection(M)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5,3)
     @test P'P ≈ Matrix(I, 3, 3)
     @test issorted(pvs; rev=true)
 
     M = fit(PCA, X; method=:svd, pratio=0.85)
 
-    @test indim(M) == 5
-    @test outdim(M) == 3
+    @test size(M) == (5,3)
     @test P'P ≈ Matrix(I, 3, 3)
     @test issorted(pvs; rev=true)
 
@@ -166,8 +164,8 @@ import SparseArrays
     fit(PCA, X ; pratio=pp)
     fit(PCA, XX ; pratio=p)
     fit(PCA, XX ; pratio=pp)
-    transform(M, XX)
-    transform(MM, X)
+    predict(M, XX)
+    predict(MM, X)
     reconstruct(M, YY)
     reconstruct(MM, Y)