diff --git a/docs/make.jl b/docs/make.jl index fb97142..a0c9280 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,7 +8,11 @@ makedocs( sitename = "MultivariateStats.jl", modules = [MultivariateStats], pages = ["Home"=>"index.md", - "whiten.md", "lda.md", "pca.md", "mds.md", + "whiten.md", + "lreg.md", + "lda.md", + "pca.md", + "mds.md", "Development"=>"api.md"] ) diff --git a/docs/src/index.md b/docs/src/index.md index 34acfcf..7f067bd 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -11,7 +11,7 @@ end [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc. ```@contents -Pages = ["whiten.md", "lda.md", "pca.md", "mds.md", "api.md"] +Pages = ["whiten.md", "lreg.md", "lda.md", "pca.md", "mda.md", "api.md"] Depth = 2 ``` diff --git a/docs/src/lreg.md b/docs/src/lreg.md new file mode 100644 index 0000000..d85c1ac --- /dev/null +++ b/docs/src/lreg.md @@ -0,0 +1,167 @@ +# Regression + +The package provides functions to perform *Linear Least Square*, *Ridge*, and *Isotonic Regression*. + +## Examples + +```@setup REGex +using Plots +gr(fmt=:svg) +``` + +Performing [`llsq`](@ref) regression on *cars* data set: + +```@example REGex +using MultivariateStats, RDatasets, Plots + +# load cars dataset +cars = dataset("datasets", "cars") + +# calculate regression models +a = llsq(cars[!,:Speed], cars[!, :Dist]) +b = isotonic(cars[!,:Speed], cars[!, :Dist]) + +# plot results +p = scatter(cars[!,:Speed], cars[!,:Dist], xlab="Speed", ylab="Distance", + leg=:topleft, lab="data") +let xs = cars[!,:Speed] + plot!(p, xs, map(x->a[1]*x+a[2], xs), lab="llsq") + plot!(p, xs, b, lab="isotonic") +end +``` + +For a single response vector `y` (without using bias): + +```julia +# prepare data +X = rand(1000, 3) # feature matrix +a0 = rand(3) # ground truths +y = X * a0 + 0.1 * randn(1000) # generate response + +# solve using llsq +a = llsq(X, y; bias=false) + +# do prediction +yp = X * a + +# measure the error +rmse = sqrt(mean(abs2.(y - yp))) +print("rmse = $rmse") +``` + +For a single response vector `y` (using bias): + +```julia +# prepare data +X = rand(1000, 3) +a0, b0 = rand(3), rand() +y = X * a0 .+ b0 .+ 0.1 * randn(1000) + +# solve using llsq +sol = llsq(X, y) + +# extract results +a, b = sol[1:end-1], sol[end] + +# do prediction +yp = X * a .+ b' +``` + +For a matrix of column-stored regressors `X` and a matrix comprised of multiple columns of dependent variables `Y`: + +```julia +# prepare data +X = rand(3, 1000) +A0, b0 = rand(3, 5), rand(1, 5) +Y = (X' * A0 .+ b0) + 0.1 * randn(1000, 5) + +# solve using llsq +sol = llsq(X, Y, dims=2) + +# extract results +A, b = sol[1:end-1,:], sol[end,:] + +# do prediction +Yp = X'*A .+ b' +``` + +## Linear Least Square + +[Linear Least Square](http://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)) +is to find linear combination(s) of given variables to fit the responses by +minimizing the squared error between them. +This can be formulated as an optimization as follows: + +```math +\mathop{\mathrm{minimize}}_{(\mathbf{a}, b)} \ + \frac{1}{2} \|\mathbf{y} - (\mathbf{X} \mathbf{a} + b)\|^2 +``` + +Sometimes, the coefficient matrix is given in a transposed form, in which case, +the optimization is modified as: + +```math +\mathop{\mathrm{minimize}}_{(\mathbf{a}, b)} \ + \frac{1}{2} \|\mathbf{y} - (\mathbf{X}^T \mathbf{a} + b)\|^2 +``` + +The package provides following functions to solve the above problems: + +```@docs +llsq +``` + +## Ridge Regression + +Compared to linear least square, [Ridge Regression](http://en.wikipedia.org/wiki/Tikhonov_regularization>) +uses an additional quadratic term to regularize the problem: + +```math +\mathop{\mathrm{minimize}}_{(\mathbf{a}, b)} \ + \frac{1}{2} \|\mathbf{y} - (\mathbf{X} \mathbf{a} + b)\|^2 + + \frac{1}{2} \mathbf{a}^T \mathbf{Q} \mathbf{a} +``` + +The transposed form: + +```math + \mathop{\mathrm{minimize}}_{(\mathbf{a}, b)} \ + \frac{1}{2} \|\mathbf{y} - (\mathbf{X}^T \mathbf{a} + b)\|^2 + + \frac{1}{2} \mathbf{a}^T \mathbf{Q} \mathbf{a} +``` + +The package provides following functions to solve the above problems: + +```@docs +ridge +``` + +## Isotonic Regression + +[Isotonic regression](https://en.wikipedia.org/wiki/Isotonic_regression) or +monotonic regression fits a sequence of observations into a fitted line that is +non-decreasing (or non-increasing) everywhere. The problem defined as a weighted +least-squares fit ``{\hat {y}}_{i} \approx y_{i}`` for all ``i``, subject to +the constraint that ``{\hat {y}}_{i} \leq {\hat {y}}_{j}`` whenever +``x_{i} \leq x_{j}``. +This gives the following quadratic program: + +```math +\min \sum_{i=1}^{n} w_{i}({\hat {y}}_{i}-y_{i})^{2} +\text{ subject to } {\hat {y}}_{i} \leq {\hat {y}}_{j} +\text{ for all } (i,j) \in E +``` +where ``E=\{(i,j):x_{i}\leq x_{j}\}`` specifies the partial ordering of +the observed inputs ``x_{i}``. + +The package provides following functions to solve the above problems: +```@docs +isotonic +``` + +--- + +### References + +[^1] Best, M.J., Chakravarti, N. Active set algorithms for isotonic regression; A unifying framework. Mathematical Programming 47, 425–439 (1990). + diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 6cd1544..12cc050 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -26,6 +26,7 @@ module MultivariateStats # lreg llsq, # Linear Least Square regression ridge, # Ridge regression + isotonic, # Isotonic regression # whiten Whitening, # Type: Whitening transformation diff --git a/src/lreg.jl b/src/lreg.jl index b157a8b..35e7f7d 100644 --- a/src/lreg.jl +++ b/src/lreg.jl @@ -1,6 +1,6 @@ -# Ridge Regression (Tikhonov regularization) +# Regression -#### auxiliary +## Auxiliary function lreg_chkdims(X::AbstractMatrix, Y::AbstractVecOrMat, trans::Bool) mX, nX = size(X) @@ -17,8 +17,23 @@ _vaug(X::AbstractMatrix{T}) where T = vcat(X, ones(T, 1, size(X,2)))::Matrix{T} _haug(X::AbstractMatrix{T}) where T = hcat(X, ones(T, size(X,1), 1))::Matrix{T} -## linear least square +## Linear Least Square Regression + +""" + llsq(X, y; ...) + +Solve the linear least square problem. + +Here, `y` can be either a vector, or a matrix where each column is a response vector. + +This function accepts two keyword arguments: + +- `dims`: whether input observations are stored as rows (`1`) or columns (`2`). (default is `1`) +- `bias`: whether to include the bias term `b`. (default is `true`) + +The function results the solution `a`. In particular, when `y` is a vector (matrix), `a` is also a vector (matrix). If `bias` is true, then the returned array is augmented as `[a; b]`. +""" function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}; trans::Bool=false, bias::Bool=true, dims::Union{Integer,Nothing}=nothing) where {T<:Real} @@ -37,9 +52,32 @@ function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}; end _ridge(X, Y, zero(T), dims == 2, bias) end +llsq(x::AbstractVector{T}, y::AbstractVector{T}; kwargs...) where {T<:Real} = + llsq(x[:,:], y; dims=1, kwargs...) + +## Ridge Regression (Tikhonov regularization) + +""" + + ridge(X, y, r; ...) + +Solve the ridge regression problem. + +Here, ``y`` can be either a vector, or a matrix where each column is a response vector. + +The argument `r` gives the quadratic regularization matrix ``Q``, which can be in either of the following forms: + +- `r` is a real scalar, then ``Q`` is considered to be `r * eye(n)`, where `n` is the dimension of `a`. +- `r` is a real vector, then ``Q`` is considered to be `diagm(r)`. +- `r` is a real symmetric matrix, then ``Q`` is simply considered to be `r`. -## ridge regression +This function accepts two keyword arguments: +- `dims`: whether input observations are stored as rows (`1`) or columns (`2`). (default is `1`) +- `bias`: whether to include the bias term `b`. (default is `true`) + +The function results the solution `a`. In particular, when `y` is a vector (matrix), `a` is also a vector (matrix). If `bias` is true, then the returned array is augmented as `[a; b]`. +""" function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, AbstractVecOrMat}; trans::Bool=false, bias::Bool=true, dims::Union{Integer,Nothing}=nothing) where {T<:Real} @@ -50,7 +88,7 @@ function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, Abst d = lreg_chkdims(X, Y, dims == 2) if isa(r, Real) r >= zero(r) || error("r must be non-negative.") - r = convert(T, r) + r = convert(T <: AbstractFloat ? T : Float64, r) elseif isa(r, AbstractVector) length(r) == d || throw(DimensionMismatch("Incorrect length of r.")) elseif isa(r, AbstractMatrix) @@ -58,11 +96,16 @@ function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, Abst end _ridge(X, Y, r, dims == 2, bias) end +ridge(x::AbstractVector{T}, y::AbstractVector{T}, r::Union{Real, AbstractVecOrMat}; + kwargs...) where {T<:Real} = ridge(x[:,:], y, r; dims=1, kwargs...) -## implementation +### implementation -function _ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, +function _ridge(_X::AbstractMatrix{T}, _Y::AbstractVecOrMat{T}, r::Union{Real, AbstractVecOrMat}, trans::Bool, bias::Bool) where {T<:Real} + # convert integer data to Float64 + X = T <: AbstractFloat ? _X : convert(Array{Float64}, _X) + Y = T <: AbstractFloat ? _Y : convert(Array{Float64}, _Y) if bias if trans X_ = _vaug(X) @@ -108,3 +151,53 @@ function _ridge_reg!(Q::AbstractMatrix, r::AbstractMatrix, bias::Bool) end return Q end + +## Isotonic Regression + +""" + isotonic(x, y[, w]) + +Solve the isotonic regression problem using the pool adjacent violators algorithm[^1]. + +Here `x` is the regressor vector, `y` is response vector, and `w` is an optional +weights vector. + +The function returns a prediction vector of the same size as the regressor vector `x`. +""" +function isotonic(x::AbstractVector{T}, y::AbstractVector{T}, + w::AbstractVector{T} = ones(T, length(y))) where {T<:Real} + n = length(x) + n == length(y) || throw(DimensionMismatch("Dimensions of x and y mismatch.")) + + idx = sortperm(x) + + # PVA algorithm + J = map(i->(Float64(y[i]*w[i]), w[i], [i]), idx) + i = 1 + B₀ = J[i] + while i < length(J) + B₊ = J[i+1] + if B₀[1] <= B₊[1] # step 1 + B₀ = B₊ + i += 1 + else # step 2 + ww = B₀[2] + B₊[2] + J[i] = ((B₀[1]*B₀[2]+B₊[1]*B₊[2])/ww, ww, append!(B₀[3], B₊[3])) + deleteat!(J, i+1) + B₀ = J[i] + while i > 1 # step 2.1 + B₋ = J[i-1] + if B₀[1] <= B₋[1] + ww = B₀[2] + B₋[2] + J[i] = ((B₀[1]*B₀[2]+B₋[1]*B₋[2])/ww, ww, append!(B₀[3], B₋[3])) + deleteat!(J, i-1) + i-=1 + else + break + end + end + end + end + [y for (y,w,ii) in J for i in sort(ii)] +end + diff --git a/test/lreg.jl b/test/lreg.jl index b86d89a..9d148a2 100644 --- a/test/lreg.jl +++ b/test/lreg.jl @@ -3,7 +3,7 @@ using Test using LinearAlgebra using StableRNGs -@testset "Ridge Regression" begin +@testset "Regression" begin rng = StableRNG(34568) @@ -184,4 +184,21 @@ using StableRNGs aa = ridge(Xt, y1, r; dims=2, bias=true) @test aa ≈ Aa[:,1] + + ## isotonic + xx = [3.3, 3.3, 3.3, 6, 7.5, 7.5] + yy = [4, 5, 1, 6, 8, 7.0] + a = isotonic(xx, yy) + b = [[1,2,3],[4],[5,6]] + @test a ≈ xx atol=0.1 + + ## using various types + @testset for (T, TR) in [(Int,Float64), (Float32, Float32)] + X, y = rand(T, 10, 3), rand(T, 10) + a = llsq(X, y) + @test eltype(a) == TR + a = ridge(X, y, one(T)) + @test eltype(a) == TR + end + end