Updated Solver documentation

NVIDIA · Aug 29, 2024 · 9796758 · 9796758
1 parent 90f7bd6
commit 9796758
Show file tree

Hide file tree

Showing 15 changed files with 202 additions and 36 deletions.
diff --git a/docs_input/api/linalg/decomp/chol.rst b/docs_input/api/linalg/decomp/chol.rst
@@ -3,13 +3,22 @@
 chol
 ####
 
-Perform a Cholesky factorization and saves the result in either the upper or lower triangle of the output. 
+Perform a Cholesky factorization.
 
 .. note::
-  The input matrix must be positive semidefinite
+  The input matrix must be symmetric positive-definite
 
 .. doxygenfunction:: chol
 
+Enums
+~~~~~
+
+The following enums are used for configuring the behavior of Cholesky operations.
+
+.. _solverfillmode:
+
+.. doxygenenum:: SolverFillMode
+
 Examples
 ~~~~~~~~
 

diff --git a/docs_input/api/linalg/decomp/inverse.rst b/docs_input/api/linalg/decomp/inverse.rst
@@ -6,8 +6,7 @@ inv
 Matrix inverse
 --------------
 
-Perform a matrix inverse on a square matrix using LU decomposition. The inverse API is currently using cuBLAS as a backend and uses
-getri/getrf functions for LU decomposition.
+Compute the inverse of a square matrix.
 
 .. note::
    This function is currently is not supported with host-based executors (CPU)

diff --git a/docs_input/api/linalg/decomp/lu.rst b/docs_input/api/linalg/decomp/lu.rst
@@ -3,8 +3,7 @@
 lu
 ##
 
-Perform an LU factorization. The input and output tensors may be the same tensor, in which case the
-input is overwritten.
+Perform an LU factorization.
 
 .. doxygenfunction:: lu
 

diff --git a/docs_input/api/linalg/decomp/qr.rst b/docs_input/api/linalg/decomp/qr.rst
@@ -7,6 +7,9 @@ Perform a QR decomposition.
 
 .. doxygenfunction:: qr
 
+.. note::
+   This function is currently is not supported with host-based executors (CPU)
+
 Examples
 ~~~~~~~~
 
@@ -16,8 +19,13 @@ Examples
    :end-before: example-end qr-test-1
    :dedent:
 
+
 .. doxygenfunction:: qr_solver
 
+.. note::
+   This function does not return `Q` explicitly as it only runs :literal:`geqrf` from LAPACK/cuSolver.
+   For full `Q/R`, use :literal:`qr_solver` on a CUDA executor.
+
 Examples
 ~~~~~~~~
 

diff --git a/docs_input/api/linalg/decomp/svd.rst b/docs_input/api/linalg/decomp/svd.rst
@@ -7,6 +7,15 @@ Perform a singular value decomposition (SVD).
 
 .. doxygenfunction:: svd
 
+Enums
+~~~~~
+
+The following enums are used for configuring the behavior of SVD operations.
+
+.. doxygenenum:: SVDMode
+.. doxygenenum:: SVDHostAlgo
+
+
 Examples
 ~~~~~~~~
 

diff --git a/docs_input/api/linalg/eigenvalues/eig.rst b/docs_input/api/linalg/eigenvalues/eig.rst
@@ -3,10 +3,18 @@
 eig
 ###
 
-Perform an eigenvalue decomposition saving the eigenvalues, and optionally saving eigenvectors
+Perform an eigenvalue decomposition for Hermitian or real symmetric matrices.
 
 .. doxygenfunction:: eig
 
+Enums
+~~~~~
+
+The following enums are used for configuring the behavior of Eig operations.
+
+.. doxygenenum:: EigenMode
+
+
 Examples
 ~~~~~~~~
 

diff --git a/include/matx/operators/chol.h b/include/matx/operators/chol.h
@@ -112,6 +112,24 @@ namespace detail {
   };
 }
 
+/**
+ * Performs a Cholesky factorization, saving the result in either the upper or
+ * lower triangle of the output. 
+ * 
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ * 
+ * @param a
+ *   Input tensor or operator of shape `... x n x n`
+ * @param uplo
+ *   Part of matrix to fill
+ * 
+ * @return
+ *   Operator that produces the factorization output of shape `... x n x n`.
+ * 
+ */
 template<typename OpA>
 __MATX_INLINE__ auto chol(const OpA &a, SolverFillMode uplo = SolverFillMode::UPPER) {
   return detail::CholOp(a, uplo);

diff --git a/include/matx/operators/det.h b/include/matx/operators/det.h
@@ -108,7 +108,18 @@ namespace detail {
  * Computes the determinant by performing an LU factorization of the input,
  * and then calculating the product of diagonal entries of the U factor.
  * 
- * For tensors of rank > 2, batching is performed.
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ * 
+ * @param a
+ *   Input square tensor or operator of shape `... x n x n`
+ * 
+ * @return
+ *   Operator that produces the determinant output of shape `...`. This means
+ *   rank 2 inputs will have an output shape of `{}`, while higher dimensional
+ *   inputs will have output shape matching the batch dimension(s).
  * 
  */
 template<typename OpA>

diff --git a/include/matx/operators/eig.h b/include/matx/operators/eig.h
@@ -94,6 +94,31 @@ namespace detail {
   };
 }
 
+
+/**
+ * Performs an eigenvalue decomposition, computing the eigenvalues, and
+ * optionally the eigenvectors, for a Hermitian or real symmetric matrix.
+ * 
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ * 
+ * @param a
+ *   Input Hermitian/symmetric tensor or operator of shape `... x n x n`
+ * @param jobz
+ *   Whether to compute eigenvectors.
+ * @param uplo
+ *   Part of matrix to fill
+ * 
+ * @return 
+ *   Operator that produces eigenvectors and eigenvalues tensors. Regardless of jobz,
+ *   both tensors must be correctly setup for the operation and used with `mtie()`.
+ *   - **Eigenvectors** - The eigenvectors tensor of shape `... x n x n` where each column
+ *       contains the normalized eigenvectors.
+ *   - **Eigenvalues** - The eigenvalues tensor of shape `... x n`. This must be real
+ *       and match the inner type of the input/output tensors.
+ */
 template<typename OpA>
 __MATX_INLINE__ auto eig(const OpA &a,
                           EigenMode jobz = EigenMode::VECTOR, 

diff --git a/include/matx/operators/inverse.h b/include/matx/operators/inverse.h
@@ -107,6 +107,25 @@ namespace detail {
   };
 }
 
+/**
+ * Performs a matrix inverse on a square matrix. The inverse API currently uses
+ * cuBLAS as a backend with the `cublas<t>matinvBatched()` family of functions
+ * for `N <= 32` and `getri/getrf` functions otherwise.
+ * 
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ * @tparam ALGO
+ *   Algorithm to use for matrix inversion. Currently only suport MAT_INVERSE_ALGO_LU
+ * 
+ * @param a
+ *   Input tensor or operator of shape `... x n x n`
+ * 
+ * @return
+ *   Operator that produces the inverse tensor of shape `... x n x n`.
+ * 
+ */
 template<typename OpA, MatInverseAlgo_t ALGO = MAT_INVERSE_ALGO_LU>
 __MATX_INLINE__ auto inv(const OpA &a) {
   return detail::InvOp(a);

diff --git a/include/matx/operators/lu.h b/include/matx/operators/lu.h
@@ -92,6 +92,31 @@ namespace detail {
   };
 }
 
+/**
+ * Performs an LU factorization using partial pivoting with row interchanges.
+ * The factorization has the form `A = P * L * U`.
+ * 
+ * The input and output tensors may be the same tensor, in which case the
+ * input is overwritten.
+ *
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ * 
+ * @param a
+ *   Input tensor or operator of shape `... x m x n`
+ * 
+ * @return
+ *   Operator that produces a tensor containing *L* and *U* and another containing the pivot indices.
+ *   - **Out** - A tensor of shape `... x m x n` containing both *L* and *U*. *L* can be extracted
+ *               from the bottom half (the unit diagonals are not stored in *Out*), and *U* can
+ *               be extracted from the top half with the diagonals.
+ *   - **Piv** - The tensor of pivot indices with shape `... x min(m, n)`. For
+ *               \f$ 0 \leq i < \min(m, n) \f$, row i was interchanged with row 
+ *               \f$ Piv(..., i) - 1 \f$. It must be of type `int64_t` for cuda
+ *               `matx::lapack_int_t` for host.
+ */
 template<typename OpA>
 __MATX_INLINE__ auto lu(const OpA &a) {
   return detail::LUOp(a);

diff --git a/include/matx/operators/pinv.h b/include/matx/operators/pinv.h
@@ -121,7 +121,7 @@ namespace detail {
  * Perfom a generalized inverse of a matrix using its singular-value decomposition (SVD).
  * It automatically removes small singular values for stability.
  * 
- * For tensors of rank > 2, batching is performed.
+ * If rank > 2, operations are batched.
  * 
  * @tparam OpA
  *   Tensor or operator type of input A
@@ -131,10 +131,11 @@ namespace detail {
  * @param rcond
  *   Cutoff for small singular values. For stability, singular values
  *   smaller than `rcond * largest_singular_value` are set to 0 for each matrix
- *   in the batch. By default, `rcond` is approximately the machine epsilon of the tensor dtype.
+ *   in the batch. By default, `rcond` is approximately the machine epsilon of the tensor dtype
+ *   (`1e-6 `for float types and `1e-15` for double types).
  * 
  * @return
- *   An operator that gives a tensor of size `... x n x m` representing the pseudo-inverse of the input
+ *   Operator that produces a tensor of size `... x n x m` representing the pseudo-inverse of the input
  */
 template<typename OpA>
 __MATX_INLINE__ auto pinv(const OpA &a, float rcond = get_default_rcond<typename OpA::value_type>()) {

diff --git a/include/matx/operators/qr.h b/include/matx/operators/qr.h
@@ -93,7 +93,9 @@ namespace detail {
 
 
 /**
- * Perform QR decomposition on a matrix using housholders reflections. If rank > 2 operations are batched.
+ * Perform QR decomposition on a matrix using housholders reflections.
+ * 
+ * If rank > 2, operations are batched.
  *
  * @tparam AType
  *   Tensor or operator type for output of A input tensors.
@@ -160,6 +162,19 @@ namespace detail {
 /**
  * Perform a QR decomposition on a matrix using cuSolver or a LAPACK host library.
  * 
+ * If rank > 2, operations are batched.
+ * 
+ * @tparam OpA
+ *   Data type of input a tensor or operator
+ *
+ * @param a
+ *   Input tensor or operator of shape `... x m x n`
+ * 
+ * @return
+ *   Operator that produces R/householder vectors and tau tensor outputs.
+ *   - **Out** - Of shape `... x m x n`. The householder vectors are returned in the
+ *               bottom half and *R* is returned in the top half.
+ *   - **Tau** - The scalar factors *tau* of shape `... x min(m, n)`.
  */
 template<typename OpA>
 __MATX_INLINE__ auto qr_solver(const OpA &a) {

diff --git a/include/matx/operators/svd.h b/include/matx/operators/svd.h
@@ -103,15 +103,15 @@ namespace detail {
  * 
  * The singular values within each vector are sorted in descending order.
  * 
- * For tensors of Rank > 2, batching is performed.
+ * If rank > 2, operations are batched.
  *
  * @tparam OpA
  *   Operator input type
  *
  * @param a
- *   Input operator of shape MxN
+ *   Input operator of shape `... x m x n`
  * @param jobz
- *   Compute all, part, or none of matrices U and VT
+ *   Compute all, part, or none of matrices *U* and *VT*
  * @param algo
  *   For Host SVD calls, whether to use more efficient divide-and-conquer based
  *   `gesdd` routine or the QR factorization based `gesvd` routine. `gesdd`
@@ -120,6 +120,16 @@ namespace detail {
  *   `gesvd`, and it can have poorer accuracy in some cases.
  *   Ignored for CUDA SVD calls.
  * 
+ * @return 
+ *   Operator that produces *U*, *S*, and *VT* tensors. Regardless of jobz, all 3 tensors
+ *   must be correctly setup for the operation and used with `mtie()`. `k = min(m, n)`
+ *   - **U** - The unitary matrix containing the left singular vectors. A tensor of
+ *             shape `... x m x k` for `SVDMode::REDUCED` and `... x m x m` otherwise.
+ *   - **S** - A tensor of shape `... x k` containing the singular values in
+ *             descending order. It must be of real type and match the inner type of
+ *             the other tensors.
+ *   - **VT** - The unitary matrix containing the right singular vectors. A tensor of
+ *             shape `... x k x n` for `SVDMode::REDUCED` and `... x n x n` otherwise.
  */
 template<typename OpA>
 __MATX_INLINE__ auto svd(const OpA &a, const SVDMode jobz = SVDMode::ALL,
@@ -189,10 +199,10 @@ namespace detail {
  *   Tensor or operator type for X0 initial guess in power iteration.
  *
  * @param A
- *   Input tensor or operator for tensor A input with size "batches by m by n"
+ *   Input tensor or operator for tensor A input with size `batches x m x n`
  * @param x0
  *   Input tensor or operator signaling the initial guess for x0 at each power iteration.  A
- *   Random tensor of size batches x min(n,m) is suggested.
+ *   Random tensor of size `batches x min(n,m)` is suggested.
  * @param iterations
  *   The number of power iterations to perform for each singular value.  
  * @param k
@@ -266,7 +276,7 @@ namespace detail {
  *   Tensor or operator type for output of A input tensors.
  *
  * @param A
- *   Input tensor or operator for tensor A input with size "batches by m by n"
+ *   Input tensor or operator for tensor A input with size `batches x m x n`
  * @param max_iters
  *   The approximate maximum number of QR iterations to perform. 
  * @param tol