From ac6fe519d1b8047cebe09b399b42d002ad6c7a4c Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 30 May 2024 11:49:40 -0700
Subject: [PATCH] CLN: Align a few bits and pieces to trapping-resolve

---
 pysindy/optimizers/trapping_sr3.py | 141 +++++++++--------------------
 test/conftest.py                   |   1 -
 2 files changed, 44 insertions(+), 98 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index f7dc9c365..53cdd9133 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -71,127 +71,71 @@ class TrappingSR3(ConstrainedSR3):
 
     Parameters
     ----------
-    threshold : float, optional (default 0.1)
-        Determines the strength of the regularization. When the
-        regularization function R is the L0 norm, the regularization
-        is equivalent to performing hard thresholding, and lambda
-        is chosen to threshold at the value given by this parameter.
-        This is equivalent to choosing lambda = threshold^2 / (2 * nu).
-
-    eta : float, optional (default 1.0e20)
-        Determines the strength of the stability term ||Pw-A||^2 in the
+    eta :
+        Determines the strength of the stability term :math:`||Pw-A||^2` in the
         optimization. The default value is very large so that the
         algorithm default is to ignore the stability term. In this limit,
         this should be approximately equivalent to the ConstrainedSR3 method.
 
-    alpha_m : float, optional (default eta * 0.1)
-        Determines the step size in the prox-gradient descent over m.
-        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
-        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.
-
-    alpha_A : float, optional (default eta)
-        Determines the step size in the prox-gradient descent over A.
-        For convergence, need alpha_A <= eta, so typically
-        alpha_A = eta is used.
+    eps_solver :
+        If threshold != 0, this specifies the error tolerance in the
+        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
 
-    alpha : float, optional (default 1.0e20)
+    alpha :
         Determines the strength of the local stability term ||Qijk||^2 in the
         optimization. The default value is very large so that the
         algorithm default is to ignore this term.
 
-    beta : float, optional (default 1.0e20)
+    beta :
         Determines the strength of the local stability term
         ||Qijk + Qjik + Qkij||^2 in the
         optimization. The default value is very large so that the
         algorithm default is to ignore this term.
 
-    gamma : float, optional (default 0.1)
+    mod_matrix:
+        Lyapunov matrix.  Trapping theorems apply to energy
+        :math:`\\propto \\dot y \\cdot y`, but also to any
+        :math:`\\propto \\dot y P \\cdot y` for Lyapunov matrix :math:`P`.
+        Defaults to the identity matrix.
+
+    alpha_A :
+        Determines the step size in the prox-gradient descent over A.
+        For convergence, need alpha_A <= eta, so typically
+        alpha_A = eta is used.
+
+    alpha_m :
+        Determines the step size in the prox-gradient descent over m.
+        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
+        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.  (default eta * 0.1)
+
+    gamma :
         Determines the negative interval that matrix A is projected onto.
         For most applications gamma = 0.1 - 1.0 works pretty well.
 
-    tol : float, optional (default 1e-5)
-        Tolerance used for determining convergence of the optimization
-        algorithm over w.
-
     tol_m : float, optional (default 1e-5)
         Tolerance used for determining convergence of the optimization
         algorithm over m.
 
-    thresholder : string, optional (default 'L1')
+    thresholder :
         Regularization function to use. For current trapping SINDy,
         only the L1 and L2 norms are implemented. Note that other convex norms
         could be straightforwardly implemented, but L0 requires
-        reformulation because of nonconvexity.
-
-    thresholds : np.ndarray, shape (n_targets, n_features), optional \
-            (default None)
-        Array of thresholds for each library function coefficient.
-        Each row corresponds to a measurement variable and each column
-        to a function from the feature library.
-        Recall that SINDy seeks a matrix :math:`\\Xi` such that
-        :math:`\\dot{X} \\approx \\Theta(X)\\Xi`.
-        ``thresholds[i, j]`` should specify the threshold to be used for the
-        (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the
-        threshold to be used for the (j + 1)st library function in the equation
-        for the (i + 1)st measurement variable.
-
-    eps_solver : float, optional (default 1.0e-7)
-        If threshold != 0, this specifies the error tolerance in the
-        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
-
-    inequality_constraints : bool, optional (default False)
-        If True, CVXPY methods are used.
-
-    max_iter : int, optional (default 30)
-        Maximum iterations of the optimization algorithm.
+        reformulation because of nonconvexity. (default 'L1')
 
-    accel : bool, optional (default False)
+    accel :
         Whether or not to use accelerated prox-gradient descent for (m, A).
+        (default False)
 
-    m0 : np.ndarray, shape (n_targets), optional (default None)
-        Initial guess for vector m in the optimization. Otherwise
-        each component of m is randomly initialized in [-1, 1].
+    m0 :
+        Initial guess for trap center in the optimization. Default None
+        initializes vector elements randomly in [-1, 1]. shape (n_targets)
 
-    A0 : np.ndarray, shape (n_targets, n_targets), optional (default None)
-        Initial guess for vector A in the optimization. Otherwise
-        A is initialized as A = diag(gamma).
-
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
-    copy_X : boolean, optional (default True)
-        If True, X will be copied; else, it may be overwritten.
-
-    normalize_columns : boolean, optional (default False)
-        Normalize the columns of x (the SINDy library terms) before regression
-        by dividing by the L2-norm. Note that the 'normalize' option in sklearn
-        is deprecated in sklearn versions >= 1.0 and will be removed.
-
-    verbose : bool, optional (default False)
-        If True, prints out the different error terms every iteration.
-
-    verbose_cvxpy : bool, optional (default False)
-        Boolean flag which is passed to CVXPY solve function to indicate if
-        output should be verbose or not. Only relevant for optimizers that
-        use the CVXPY package in some capabity.
+    A0 :
+        Initial guess for vector A in the optimization.  Shape (n_targets, n_targets)
+        Default None, meaning A is initialized as A = diag(gamma).
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,) or (n_targets, n_features)
-        Regularized weight vector(s). This is the v in the objective
-        function.
-
-    history_ : list
-        History of sparse coefficients. ``history_[k]`` contains the
-        sparse coefficients (v in the optimization objective function)
-        at iteration k.
-
-    objective_history_ : list
-        History of the value of the objective at each step. Note that
-        the trapping SINDy problem is nonconvex, meaning that this value
-        may increase and decrease as the algorithm works.
-
     A_history_ : list
         History of the auxiliary variable A that approximates diag(PW).
 
@@ -224,6 +168,11 @@ class TrappingSR3(ConstrainedSR3):
         Transpose of 1st dimension and 2nd dimension of quadratic coefficient
         part of the P matrix in ||Pw - A||^2
 
+    objective_history_ : list
+        History of the value of the objective at each step. Note that
+        the trapping SINDy problem is nonconvex, meaning that this value
+        may increase and decrease as the algorithm works.
+
     Examples
     --------
     >>> import numpy as np
@@ -386,6 +335,8 @@ def __post_init_guard(self):
             raise ValueError(
                 "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
             )
+        if self.mod_matrix is None:
+            self.mod_matrix = np.eye(self._n_tgts)
 
     def set_params(self, **kwargs):
         super().set_params(**kwargs)
@@ -490,7 +441,7 @@ def _build_PQ(polyterms: list[tuple[int, Int1D]]) -> Float5D:
 
     def _set_Ptensors(
         self, n_targets: int
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[Float3D, Float4D, Float4D, Float5D, Float5D, Float5D]:
         """Make the projection tensors used for the algorithm."""
         lib = PolynomialLibrary(2, include_bias=self._include_bias).fit(
             np.zeros((1, n_targets))
@@ -502,7 +453,7 @@ def _set_Ptensors(
         PQ_tensor = self._build_PQ(polyterms)
         PT_tensor = PQ_tensor.transpose([1, 0, 2, 3, 4])
         # PM is the sum of PQ and PQ which projects out the sum of Qijk and Qjik
-        PM_tensor = PQ_tensor + PT_tensor
+        PM_tensor = cast(Float5D, PQ_tensor + PT_tensor)
 
         return PC_tensor, PL_tensor_unsym, PL_tensor, PQ_tensor, PT_tensor, PM_tensor
 
@@ -702,9 +653,6 @@ def _reduce(self, x, y):
         )
         var_len = n_features * n_tgts
 
-        if self.mod_matrix is None:
-            self.mod_matrix = np.eye(n_tgts)
-
         # Define PL, PQ, PT and PM tensors, only relevant if the stability term in
         # trapping SINDy is turned on.
         (
@@ -825,9 +773,8 @@ def _reduce(self, x, y):
                 self._m_convergence_criterion() < self.tol_m
                 and self._convergence_criterion() < self.tol
             ):
-                # Could not (further) select important features
                 break
-        if k == self.max_iter - 1:
+        else:
             warnings.warn(
                 "TrappingSR3._reduce did not converge after {} iters.".format(
                     self.max_iter
diff --git a/test/conftest.py b/test/conftest.py
index 33e9e1b06..b63d344d0 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -70,7 +70,6 @@ def data_lorenz():
 
 @pytest.fixture
 def data_multiple_trajectories():
-
     n_points = [100, 200, 500]
     initial_conditions = [
         [8, 27, -7],