diff --git a/docs_input/api/linalg/decomp/chol.rst b/docs_input/api/linalg/decomp/chol.rst index 30ddfd52..c60c2c50 100644 --- a/docs_input/api/linalg/decomp/chol.rst +++ b/docs_input/api/linalg/decomp/chol.rst @@ -3,13 +3,22 @@ chol #### -Perform a Cholesky factorization and saves the result in either the upper or lower triangle of the output. +Perform a Cholesky factorization. .. note:: - The input matrix must be positive semidefinite + The input matrix must be symmetric positive-definite .. doxygenfunction:: chol +Enums +~~~~~ + +The following enums are used for configuring the behavior of Cholesky operations. + +.. _solverfillmode: + +.. doxygenenum:: SolverFillMode + Examples ~~~~~~~~ diff --git a/docs_input/api/linalg/decomp/inverse.rst b/docs_input/api/linalg/decomp/inverse.rst index 4982970a..18bdd01b 100644 --- a/docs_input/api/linalg/decomp/inverse.rst +++ b/docs_input/api/linalg/decomp/inverse.rst @@ -6,8 +6,7 @@ inv Matrix inverse -------------- -Perform a matrix inverse on a square matrix using LU decomposition. The inverse API is currently using cuBLAS as a backend and uses -getri/getrf functions for LU decomposition. +Compute the inverse of a square matrix. .. note:: This function is currently is not supported with host-based executors (CPU) diff --git a/docs_input/api/linalg/decomp/lu.rst b/docs_input/api/linalg/decomp/lu.rst index 744dba36..416b5035 100644 --- a/docs_input/api/linalg/decomp/lu.rst +++ b/docs_input/api/linalg/decomp/lu.rst @@ -3,8 +3,7 @@ lu ## -Perform an LU factorization. The input and output tensors may be the same tensor, in which case the -input is overwritten. +Perform an LU factorization. .. doxygenfunction:: lu diff --git a/docs_input/api/linalg/decomp/qr.rst b/docs_input/api/linalg/decomp/qr.rst index 00477d8b..0cdbf3dc 100644 --- a/docs_input/api/linalg/decomp/qr.rst +++ b/docs_input/api/linalg/decomp/qr.rst @@ -7,6 +7,9 @@ Perform a QR decomposition. .. doxygenfunction:: qr +.. note:: + This function is currently is not supported with host-based executors (CPU) + Examples ~~~~~~~~ @@ -16,8 +19,13 @@ Examples :end-before: example-end qr-test-1 :dedent: + .. doxygenfunction:: qr_solver +.. note:: + This function does not return `Q` explicitly as it only runs :literal:`geqrf` from LAPACK/cuSolver. + For full `Q/R`, use :literal:`qr_solver` on a CUDA executor. + Examples ~~~~~~~~ diff --git a/docs_input/api/linalg/decomp/svd.rst b/docs_input/api/linalg/decomp/svd.rst index 9f5edd4e..b7853b2d 100644 --- a/docs_input/api/linalg/decomp/svd.rst +++ b/docs_input/api/linalg/decomp/svd.rst @@ -7,6 +7,15 @@ Perform a singular value decomposition (SVD). .. doxygenfunction:: svd +Enums +~~~~~ + +The following enums are used for configuring the behavior of SVD operations. + +.. doxygenenum:: SVDMode +.. doxygenenum:: SVDHostAlgo + + Examples ~~~~~~~~ diff --git a/docs_input/api/linalg/eigenvalues/eig.rst b/docs_input/api/linalg/eigenvalues/eig.rst index a75ef1ef..b0635961 100644 --- a/docs_input/api/linalg/eigenvalues/eig.rst +++ b/docs_input/api/linalg/eigenvalues/eig.rst @@ -3,10 +3,18 @@ eig ### -Perform an eigenvalue decomposition saving the eigenvalues, and optionally saving eigenvectors +Perform an eigenvalue decomposition for Hermitian or real symmetric matrices. .. doxygenfunction:: eig +Enums +~~~~~ + +The following enums are used for configuring the behavior of Eig operations. + +.. doxygenenum:: EigenMode + + Examples ~~~~~~~~ diff --git a/include/matx/operators/chol.h b/include/matx/operators/chol.h index 849dc3a1..85aa87f4 100644 --- a/include/matx/operators/chol.h +++ b/include/matx/operators/chol.h @@ -112,6 +112,24 @@ namespace detail { }; } +/** + * Performs a Cholesky factorization, saving the result in either the upper or + * lower triangle of the output. + * + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * + * @param a + * Input tensor or operator of shape `... x n x n` + * @param uplo + * Part of matrix to fill + * + * @return + * Operator that produces the factorization output of shape `... x n x n`. + * + */ template __MATX_INLINE__ auto chol(const OpA &a, SolverFillMode uplo = SolverFillMode::UPPER) { return detail::CholOp(a, uplo); diff --git a/include/matx/operators/det.h b/include/matx/operators/det.h index e2c17a78..6a12090e 100644 --- a/include/matx/operators/det.h +++ b/include/matx/operators/det.h @@ -108,7 +108,18 @@ namespace detail { * Computes the determinant by performing an LU factorization of the input, * and then calculating the product of diagonal entries of the U factor. * - * For tensors of rank > 2, batching is performed. + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * + * @param a + * Input square tensor or operator of shape `... x n x n` + * + * @return + * Operator that produces the determinant output of shape `...`. This means + * rank 2 inputs will have an output shape of `{}`, while higher dimensional + * inputs will have output shape matching the batch dimension(s). * */ template diff --git a/include/matx/operators/eig.h b/include/matx/operators/eig.h index 8a100d08..1baea3bd 100644 --- a/include/matx/operators/eig.h +++ b/include/matx/operators/eig.h @@ -94,6 +94,31 @@ namespace detail { }; } + +/** + * Performs an eigenvalue decomposition, computing the eigenvalues, and + * optionally the eigenvectors, for a Hermitian or real symmetric matrix. + * + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * + * @param a + * Input Hermitian/symmetric tensor or operator of shape `... x n x n` + * @param jobz + * Whether to compute eigenvectors. + * @param uplo + * Part of matrix to fill + * + * @return + * Operator that produces eigenvectors and eigenvalues tensors. Regardless of jobz, + * both tensors must be correctly setup for the operation and used with `mtie()`. + * - **Eigenvectors** - The eigenvectors tensor of shape `... x n x n` where each column + * contains the normalized eigenvectors. + * - **Eigenvalues** - The eigenvalues tensor of shape `... x n`. This must be real + * and match the inner type of the input/output tensors. + */ template __MATX_INLINE__ auto eig(const OpA &a, EigenMode jobz = EigenMode::VECTOR, diff --git a/include/matx/operators/inverse.h b/include/matx/operators/inverse.h index 69684f6a..e979862f 100644 --- a/include/matx/operators/inverse.h +++ b/include/matx/operators/inverse.h @@ -107,6 +107,25 @@ namespace detail { }; } +/** + * Performs a matrix inverse on a square matrix. The inverse API currently uses + * cuBLAS as a backend with the `cublasmatinvBatched()` family of functions + * for `N <= 32` and `getri/getrf` functions otherwise. + * + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * @tparam ALGO + * Algorithm to use for matrix inversion. Currently only suport MAT_INVERSE_ALGO_LU + * + * @param a + * Input tensor or operator of shape `... x n x n` + * + * @return + * Operator that produces the inverse tensor of shape `... x n x n`. + * + */ template __MATX_INLINE__ auto inv(const OpA &a) { return detail::InvOp(a); diff --git a/include/matx/operators/lu.h b/include/matx/operators/lu.h index a1b224ce..7b4cf037 100644 --- a/include/matx/operators/lu.h +++ b/include/matx/operators/lu.h @@ -92,6 +92,31 @@ namespace detail { }; } +/** + * Performs an LU factorization using partial pivoting with row interchanges. + * The factorization has the form `A = P * L * U`. + * + * The input and output tensors may be the same tensor, in which case the + * input is overwritten. + * + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * + * @param a + * Input tensor or operator of shape `... x m x n` + * + * @return + * Operator that produces a tensor containing *L* and *U* and another containing the pivot indices. + * - **Out** - A tensor of shape `... x m x n` containing both *L* and *U*. *L* can be extracted + * from the bottom half (the unit diagonals are not stored in *Out*), and *U* can + * be extracted from the top half with the diagonals. + * - **Piv** - The tensor of pivot indices with shape `... x min(m, n)`. For + * \f$ 0 \leq i < \min(m, n) \f$, row i was interchanged with row + * \f$ Piv(..., i) - 1 \f$. It must be of type `int64_t` for cuda + * `matx::lapack_int_t` for host. + */ template __MATX_INLINE__ auto lu(const OpA &a) { return detail::LUOp(a); diff --git a/include/matx/operators/pinv.h b/include/matx/operators/pinv.h index fac731ea..099517da 100644 --- a/include/matx/operators/pinv.h +++ b/include/matx/operators/pinv.h @@ -121,7 +121,7 @@ namespace detail { * Perfom a generalized inverse of a matrix using its singular-value decomposition (SVD). * It automatically removes small singular values for stability. * - * For tensors of rank > 2, batching is performed. + * If rank > 2, operations are batched. * * @tparam OpA * Tensor or operator type of input A @@ -131,10 +131,11 @@ namespace detail { * @param rcond * Cutoff for small singular values. For stability, singular values * smaller than `rcond * largest_singular_value` are set to 0 for each matrix - * in the batch. By default, `rcond` is approximately the machine epsilon of the tensor dtype. + * in the batch. By default, `rcond` is approximately the machine epsilon of the tensor dtype + * (`1e-6 `for float types and `1e-15` for double types). * * @return - * An operator that gives a tensor of size `... x n x m` representing the pseudo-inverse of the input + * Operator that produces a tensor of size `... x n x m` representing the pseudo-inverse of the input */ template __MATX_INLINE__ auto pinv(const OpA &a, float rcond = get_default_rcond()) { diff --git a/include/matx/operators/qr.h b/include/matx/operators/qr.h index 9b0de509..845dc56a 100644 --- a/include/matx/operators/qr.h +++ b/include/matx/operators/qr.h @@ -93,7 +93,9 @@ namespace detail { /** - * Perform QR decomposition on a matrix using housholders reflections. If rank > 2 operations are batched. + * Perform QR decomposition on a matrix using housholders reflections. + * + * If rank > 2, operations are batched. * * @tparam AType * Tensor or operator type for output of A input tensors. @@ -160,6 +162,19 @@ namespace detail { /** * Perform a QR decomposition on a matrix using cuSolver or a LAPACK host library. * + * If rank > 2, operations are batched. + * + * @tparam OpA + * Data type of input a tensor or operator + * + * @param a + * Input tensor or operator of shape `... x m x n` + * + * @return + * Operator that produces R/householder vectors and tau tensor outputs. + * - **Out** - Of shape `... x m x n`. The householder vectors are returned in the + * bottom half and *R* is returned in the top half. + * - **Tau** - The scalar factors *tau* of shape `... x min(m, n)`. */ template __MATX_INLINE__ auto qr_solver(const OpA &a) { diff --git a/include/matx/operators/svd.h b/include/matx/operators/svd.h index ecd25042..87c4966f 100644 --- a/include/matx/operators/svd.h +++ b/include/matx/operators/svd.h @@ -103,15 +103,15 @@ namespace detail { * * The singular values within each vector are sorted in descending order. * - * For tensors of Rank > 2, batching is performed. + * If rank > 2, operations are batched. * * @tparam OpA * Operator input type * * @param a - * Input operator of shape MxN + * Input operator of shape `... x m x n` * @param jobz - * Compute all, part, or none of matrices U and VT + * Compute all, part, or none of matrices *U* and *VT* * @param algo * For Host SVD calls, whether to use more efficient divide-and-conquer based * `gesdd` routine or the QR factorization based `gesvd` routine. `gesdd` @@ -120,6 +120,16 @@ namespace detail { * `gesvd`, and it can have poorer accuracy in some cases. * Ignored for CUDA SVD calls. * + * @return + * Operator that produces *U*, *S*, and *VT* tensors. Regardless of jobz, all 3 tensors + * must be correctly setup for the operation and used with `mtie()`. `k = min(m, n)` + * - **U** - The unitary matrix containing the left singular vectors. A tensor of + * shape `... x m x k` for `SVDMode::REDUCED` and `... x m x m` otherwise. + * - **S** - A tensor of shape `... x k` containing the singular values in + * descending order. It must be of real type and match the inner type of + * the other tensors. + * - **VT** - The unitary matrix containing the right singular vectors. A tensor of + * shape `... x k x n` for `SVDMode::REDUCED` and `... x n x n` otherwise. */ template __MATX_INLINE__ auto svd(const OpA &a, const SVDMode jobz = SVDMode::ALL, @@ -189,10 +199,10 @@ namespace detail { * Tensor or operator type for X0 initial guess in power iteration. * * @param A - * Input tensor or operator for tensor A input with size "batches by m by n" + * Input tensor or operator for tensor A input with size `batches x m x n` * @param x0 * Input tensor or operator signaling the initial guess for x0 at each power iteration. A - * Random tensor of size batches x min(n,m) is suggested. + * Random tensor of size `batches x min(n,m)` is suggested. * @param iterations * The number of power iterations to perform for each singular value. * @param k @@ -266,7 +276,7 @@ namespace detail { * Tensor or operator type for output of A input tensors. * * @param A - * Input tensor or operator for tensor A input with size "batches by m by n" + * Input tensor or operator for tensor A input with size `batches x m x n` * @param max_iters * The approximate maximum number of QR iterations to perform. * @param tol diff --git a/include/matx/transforms/solver_common.h b/include/matx/transforms/solver_common.h index 01bfa2cd..322ff23d 100644 --- a/include/matx/transforms/solver_common.h +++ b/include/matx/transforms/solver_common.h @@ -59,34 +59,44 @@ namespace matx { /* Parameter enums */ -// Which part (lower or upper) of the dense matrix was filled -// and should be used by the function +/** + * @enum SolverFillMode + * Indicates which part (lower or upper) of the dense matrix was filled + * and should be used by the function. + */ enum class SolverFillMode { - UPPER, - LOWER + UPPER, /**< Use the upper part of the matrix */ + LOWER /**< Use the lower part of the matrix */ }; +/** + * @enum EigenMode + * Specifies whether or not eigenvectors should be computed. + */ enum class EigenMode { - NO_VECTOR, // Only eigenvalues are computed - VECTOR // Both eigenvalues and eigenvectors are computed + NO_VECTOR, /**< Only eigenvalues are computed */ + VECTOR /**< Both eigenvalues and eigenvectors are computed */ }; -// SVD modes for computing columns of U and rows of VT, which are -// termed jobu and jobvt in LAPACK/cuSolver. The same option is used for -// both jobu and jobvt in MatX. +/** + * @enum SVDMode + * Modes for computing columns of *U* and rows of *VT* in Singular Value Decomposition (SVD). + * Corresponds to the LAPACK/cuSolver parameters jobu and jobvt. The same option is used + * for both jobu and jobvt in MatX. + */ enum class SVDMode { - ALL, // Compute all columns of U and all rows of V^T - // Equivalent to jobu = jobvt = 'A' - REDUCED, // Compute only the first min(m,n) columns of U and rows of V^T - // Equivalent to jobu = jobvt = 'S' - NONE // Compute no columns of U or rows of V^T - // Equivalent to jobu = jobvt = 'N' + ALL, /**< Compute all columns of *U* and all rows of *VT* (Equivalent to jobu = jobvt = 'A') */ + REDUCED, /**< Compute only the first `min(m,n` columns of *U* and rows of *VT* (Equivalent to jobu = jobvt = 'S') */ + NONE /**< Compute no columns of *U* or rows of *VT* (Equivalent to jobu = jobvt = 'N') */ }; -// Controls the LAPACK driver used for SVD on host. +/** + * @enum SVDHostAlgo + * Controls the LAPACK driver used for SVD on host. + */ enum class SVDHostAlgo { - QR, // QR based (corresponds to GESVD) - DC // Divide and Conquer based (corresponds to GESDD) + QR, /**< QR-based method (corresponds to `gesvd`) */ + DC /**< Divide and Conquer method (corresponds to `gesdd`) */ }; namespace detail {