From f399f091daa8bdd4a146d29a4ed20dbe7639a651 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Tue, 14 Nov 2023 15:59:10 -0500
Subject: [PATCH 01/44] DFBS generator can compute products of primitive AO
 functions

---
 include/libint2/basis.h.in       |  19 +++
 include/libint2/dfbs_generator.h | 210 +++++++++++++++++++++++++++++++
 2 files changed, 229 insertions(+)
 create mode 100644 include/libint2/dfbs_generator.h
diff --git a/include/libint2/basis.h.in b/include/libint2/basis.h.in
index d3d156ec9..aa37be2a0 100644
--- a/include/libint2/basis.h.in
+++ b/include/libint2/basis.h.in
@@ -85,6 +85,25 @@ namespace libint2 {
     return l;
   }
 
+    /// Computes uncontracted shells from a vector of shells
+    /// @param[in] cluster a vector of shells
+    /// @return a vector of uncontracted shells
+    std::vector<Shell> uncontract(
+            const std::vector<Shell> &cluster) {
+        std::vector<Shell> primitive_cluster;
+        for (const auto &contracted_shell: cluster) {
+            for (auto p = 0; p < contracted_shell.nprim(); p++) {
+                const auto prim_shell = contracted_shell.extract_primitive(p, true);
+                // if dealing with generally contracted basis (e.g., cc-pvxz) represented
+                // as a segmented basis need to remove duplicates
+                if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
+                              prim_shell) == primitive_cluster.end())
+                    primitive_cluster.emplace_back(std::move(prim_shell));
+            }
+        }
+        return primitive_cluster;
+    }
+
   /// BasisSet is a slightly decorated \c std::vector of \c libint2::Shell objects.
   class BasisSet : private std::vector<libint2::Shell> {
     public:
diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
new file mode 100644
index 000000000..08aae37f2
--- /dev/null
+++ b/include/libint2/dfbs_generator.h
@@ -0,0 +1,210 @@
+/*
+ *  Copyright (C) 2004-2021 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_dfbs_generator_h_
+#define _libint2_src_lib_libint_dfbs_generator_h_
+
+#include <algorithm>
+#include <libint2.h>
+#include <math.h>
+#include <libint2/shell.h>
+#include <libint2/basis.h>
+#include <libint2/atom.h>
+#include <libint2/boys.h>
+#include <Eigen/Dense>
+#include <Eigen/Eigenvalues>
+#include <boost/math/special_functions/gamma.hpp>
+
+namespace libint2 {
+    typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrix;
+    typedef Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrix_int;
+    typedef std::vector<Shell> Shellvec;
+
+    namespace datail {
+
+        /// @brief returns \Gamma(x)  of x
+        double gamma(const double &x) {
+            return boost::math::tgamma(x);
+        }
+
+        /// @brief return effective exponent of product of two primitive shells
+        /// @param shell1 first shell
+        /// @param shell2 second shell
+        /// @param L total angular momentum of product function
+        /// @return effective exponent of product function
+        double alpha_eff(const Shell &shell1, const Shell &shell2, const int &L) {
+            auto alpha1 = shell1.alpha[0];
+            auto alpha2 = shell2.alpha[0];
+            auto l1 = shell1.contr[0].l;
+            auto l2 = shell2.contr[0].l;
+            auto prefactor = std::pow((gamma(L + 2.) * gamma(l1 + l2 + 1.5)) / (gamma(l1 + l2 + 2.) * gamma(L + 1.5)),
+                                      2.);
+            return prefactor * (alpha1 + alpha2);
+        }
+
+        /// @brief creates a set of product functions from a set of primitive shells
+        /// @param primitive_shells set of primitive shells
+        Shellvec product_functions(const Shellvec &primitive_shells) {
+            Shellvec product_functions;
+            for (auto i = 0; i < primitive_shells.size(); ++i) {
+                for (auto j = 0; j <= i; ++j) {
+                    auto li = primitive_shells[i].contr[0].l;
+                    auto lj = primitive_shells[j].contr[0].l;
+                    for (auto L = std::abs(li - lj); L <= li + lj; L++) {
+                        auto alpha = libint2::svector<double>({alpha_eff(primitive_shells[i], primitive_shells[j], L)});
+                        libint2::svector<Shell::Contraction> contr_;
+                        Shell::Contraction contr1;
+                        contr1.l = L;
+                        contr1.pure = true; // libint2 needs solid harmonics for 2c2b integrals
+                        contr1.coeff = {1.0};
+                        contr_.push_back(contr1);
+                        assert(primitive_shells[i].O == primitive_shells[j].O);
+                        auto shell = Shell(alpha, contr_, primitive_shells[i].O);
+                        if (std::find(product_functions.begin(), product_functions.end(),
+                                      shell) == product_functions.end())
+                            product_functions.emplace_back(shell);
+                    }
+                }
+            }
+            return product_functions;
+        }
+
+        /// @brief creates a set of candidate product shells from a set of primitive shells
+        /// @param primitive_shells set of primitive shells
+        /// @return set of candidate product shells
+        std::vector<Shellvec> candidate_functions(const std::vector<Shellvec> &primitive_shells) {
+            std::vector<Shellvec> candidate_functions;
+            for (auto i = 0; i < primitive_shells.size(); ++i) {
+                candidate_functions.push_back(product_functions(primitive_shells[i]));
+            }
+            return candidate_functions;
+        }
+
+        /// @brief returns a hash map of shell indices to basis function indices
+        std::vector<size_t> map_shell_to_basis_function(const std::vector<libint2::Shell> &shells) {
+            std::vector<size_t> result;
+            result.reserve(shells.size());
+
+            size_t n = 0;
+            for (auto shell: shells) {
+                result.push_back(n);
+                n += shell.size();
+            }
+
+            return result;
+        }
+
+        /// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
+        /// @param shells set of shells
+        /// @return Coulomb matrix
+        Matrix compute_coulomb_matrix(const Shellvec &shells) {
+            const auto n = nbf(shells);
+            Matrix result = Matrix::Zero(n, n);
+            using libint2::Engine;
+            Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells), 0);
+            engine.set(BraKet::xs_xs);
+            engine.set(ScreeningMethod::Conservative);
+            auto shell2bf = map_shell_to_basis_function(shells);
+            const auto &buf = engine.results();
+            for (auto s1 = 0; s1 != shells.size(); ++s1) {
+                auto bf1 = shell2bf[s1];
+                auto n1 = shells[s1].size();
+                for (auto s2 = 0; s2 <= s1; ++s2) {
+                    auto bf2 = shell2bf[s2];
+                    auto n2 = shells[s2].size();
+                    engine.compute(shells[s1], shells[s2]);
+                    Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
+                    result.block(bf1, bf2, n1, n2) = buf_mat;
+                    if (s1 != s2)
+                        result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
+                }
+            }
+            return result;
+        }
+
+        /// @brief Sorts a vector of shells by angular momentum
+        std::vector<Shellvec> sort_by_L(const Shellvec& shells){
+            int lmax = max_l(shells);
+            std::vector<Shellvec> sorted_shells;
+            sorted_shells.resize(lmax+1);
+            for(auto shell:shells){
+                auto l = shell.contr[0].l;
+                sorted_shells[l].push_back(shell);
+            }
+            return sorted_shells;
+        }
+
+    }// namespace detail
+
+    class DFBS_generator {
+    public:
+        /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO basis functions
+        /// @param obs_name name of basis set for AO functions
+        /// @param atoms vector of atoms
+        DFBS_generator(std::string obs_name,
+                       const std::vector<Atom> &atoms) : obs_name_(std::move(obs_name)), atoms_(std::move(atoms)) {
+            std::vector<Shellvec> obs_shell_vec;
+            std::vector<Shellvec> primitive_cluster;
+            // get AO basis shells for each atom
+            for (auto atom: atoms) {
+                auto atom_bs = BasisSet(obs_name_, {atom});
+                obs_shell_vec.emplace_back(atom_bs.shells());
+            }
+            // get primitive shells from AO functions
+            for (auto obs_shells: obs_shell_vec) {
+                primitive_cluster.emplace_back(uncontract(obs_shells));
+            }
+
+            //compute candidate shells
+            candidate_shells_ = datail::candidate_functions(primitive_cluster);
+
+            // initialize libint2
+            libint2::initialize();
+        }
+
+        DFBS_generator() = default;
+
+        ~DFBS_generator() = default;
+
+        /// @brief returns the candidate shells (full set of product functions)
+        std::vector<Shellvec> candidate_shells() {
+            return candidate_shells_;
+        }
+
+        /// @brief returns the candidate shells sorted by angular momentum
+        std::vector<std::vector<Shellvec>> candidates_sorted_in_L(){
+            std::vector<std::vector<Shellvec>> sorted_shells;
+            for(auto shells:candidate_shells_){
+                sorted_shells.push_back(datail::sort_by_L(shells));
+            }
+            return sorted_shells;
+        }
+
+    private:
+        std::string obs_name_;  //name of AO basis set
+        std::vector<Atom> atoms_; //vector of atoms
+        std::vector<Shellvec> candidate_shells_;  //full set of product functions
+        std::vector<Shellvec> reduced_shells_;    //reduced set of product functions
+
+    };
+
+} // namespace libint2
+
+#endif /* _libint2_src_lib_libint_dfbs_generator_h_ */

From f86a3fc6c879331ff20e30212bacbd3dd898579f Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 16 Nov 2023 21:58:30 -0500
Subject: [PATCH 02/44] implemented pivoted cholesky algorithm and addressed
 comments

---
 include/libint2/pivoted_cholesky.h            | 122 ++++++++++++++++++
 .../libint2/util/generated/pivoted_cholesky.h |   0
 2 files changed, 122 insertions(+)
 create mode 100644 include/libint2/pivoted_cholesky.h
 create mode 100644 include/libint2/util/generated/pivoted_cholesky.h

diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
new file mode 100644
index 000000000..3f1252f00
--- /dev/null
+++ b/include/libint2/pivoted_cholesky.h
@@ -0,0 +1,122 @@
+/*
+ *  Copyright (C) 2004-2021 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_pivoted_cholesky_h_
+#define _libint2_src_lib_libint_pivoted_cholesky_h_
+
+
+#include <iostream>
+#include <Eigen/Dense>
+#include <algorithm>
+
+namespace libint2 {
+
+    /// @brief computes the pivoted Cholesky decomposition of a symmetric positive definite matrix
+    /// @param A symmetric positive definite matrix
+    /// @param tolerance tolerance for the error
+    /// @param pivot initial pivot indices
+    /// @return pivoted Cholesky decomposition of A
+    std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A, const double &tolerance, std::vector<size_t> pivot) {
+        // number of elements in A
+        auto n = A.rows();
+        // diagonal elements of A
+        std::vector<double> d(n);
+        // initial error
+        auto error = A.diagonal()[0];
+        for (auto i = 0; i < n; ++i) {
+            d[i] = A.diagonal()[i];
+            error = std::max(d[i], error);
+        }
+
+        // Return matrix
+        Eigen::MatrixXd L(n, n);
+
+        // loop index
+        size_t m = 0;
+
+        // copy input pivot indices
+        std::vector<size_t> piv;
+        piv.reserve(n);
+        for (auto i = 0; i < n; ++i) {
+            piv.push_back(pivot[i]);
+        }
+
+        while (error > tolerance && m < n) {
+
+            // update pivot indices
+            // Errors in pivoted order
+            std::vector<double> err(d.size());
+            for (auto i = 0; i < d.size(); ++i) {
+                err[i] = d[piv[i]];
+            }
+            // error vector after mth element
+            std::vector<double> err2(err.begin() + m, err.end());
+            std::vector<size_t> idx(err2.size());
+            for (auto i = 0; i < idx.size(); ++i) {
+                idx[i] = i;
+            }
+            // sort indices
+            std::sort(idx.begin(), idx.end(), [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
+            // subvector of piv
+            std::vector<size_t> piv_subvec(piv.size() - m);
+            for (auto i = 0; i < piv_subvec.size(); ++i) {
+                piv_subvec[i] = piv[i + m];
+            }
+            // sort piv
+            for (auto i = 0; i < idx.size(); ++i) {
+                piv[i + m] = piv_subvec[idx[i]];
+            }
+
+            // TODO: find a better way to update pivot indices
+
+            // current pivot index
+            size_t pim = piv[m];
+            // compute diagonal element
+            L(m, pim) = std::sqrt(d[pim]);
+
+            //off-diagonal elements
+            for (auto i = m + 1; i < n; ++i) {
+                auto pii = piv[i];
+                //compute element
+                L(m, pii) = (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) / L(m, pim) :
+                            (A(pim, pii)) / L(m, pim);
+                //update d
+                d[pii] -= L(m, pii) * L(m, pii);
+            }
+            //update error
+            if (m + 1 < n) {
+                error = d[piv[m + 1]];
+                for (auto i = m + 1; i < n; ++i) {
+                    error = std::max(d[piv[i]], error);
+                }
+            }
+            //increase m
+            m++;
+        }
+        //Transpose to get Cholesky vectors as columns
+        L.transposeInPlace();
+        // Drop unnecessary columns
+        L.conservativeResize(n, m);
+        return piv;
+    }
+
+}
+
+#endif /* _libint2_src_lib_libint_pivoted_cholesky_h_ */
diff --git a/include/libint2/util/generated/pivoted_cholesky.h b/include/libint2/util/generated/pivoted_cholesky.h
new file mode 100644
index 000000000..e69de29bb

From b6d8b505976f6060fd57746dee43883b20fd8886 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 16 Nov 2023 22:06:05 -0500
Subject: [PATCH 03/44] removed unnecessary typedefs and used std::tgamma

---
 include/libint2/dfbs_generator.h | 73 +++++++++++++++++---------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 08aae37f2..ce793bbe2 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -30,18 +30,14 @@
 #include <libint2/boys.h>
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
-#include <boost/math/special_functions/gamma.hpp>
 
 namespace libint2 {
-    typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrix;
-    typedef Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrix_int;
-    typedef std::vector<Shell> Shellvec;
 
     namespace datail {
 
         /// @brief returns \Gamma(x)  of x
-        double gamma(const double &x) {
-            return boost::math::tgamma(x);
+        double gamma_function(const double &x) {
+            return std::tgamma(x);
         }
 
         /// @brief return effective exponent of product of two primitive shells
@@ -54,15 +50,16 @@ namespace libint2 {
             auto alpha2 = shell2.alpha[0];
             auto l1 = shell1.contr[0].l;
             auto l2 = shell2.contr[0].l;
-            auto prefactor = std::pow((gamma(L + 2.) * gamma(l1 + l2 + 1.5)) / (gamma(l1 + l2 + 2.) * gamma(L + 1.5)),
+            auto prefactor = std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
+                                      (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
                                       2.);
             return prefactor * (alpha1 + alpha2);
         }
 
         /// @brief creates a set of product functions from a set of primitive shells
         /// @param primitive_shells set of primitive shells
-        Shellvec product_functions(const Shellvec &primitive_shells) {
-            Shellvec product_functions;
+        std::vector<Shell> product_functions(const std::vector<Shell> &primitive_shells) {
+            std::vector<Shell> product_functions;
             for (auto i = 0; i < primitive_shells.size(); ++i) {
                 for (auto j = 0; j <= i; ++j) {
                     auto li = primitive_shells[i].contr[0].l;
@@ -89,8 +86,8 @@ namespace libint2 {
         /// @brief creates a set of candidate product shells from a set of primitive shells
         /// @param primitive_shells set of primitive shells
         /// @return set of candidate product shells
-        std::vector<Shellvec> candidate_functions(const std::vector<Shellvec> &primitive_shells) {
-            std::vector<Shellvec> candidate_functions;
+        std::vector<std::vector<Shell>> candidate_functions(const std::vector<std::vector<Shell>> &primitive_shells) {
+            std::vector<std::vector<Shell>> candidate_functions;
             for (auto i = 0; i < primitive_shells.size(); ++i) {
                 candidate_functions.push_back(product_functions(primitive_shells[i]));
             }
@@ -114,9 +111,9 @@ namespace libint2 {
         /// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
         /// @param shells set of shells
         /// @return Coulomb matrix
-        Matrix compute_coulomb_matrix(const Shellvec &shells) {
+        Eigen::MatrixXd compute_coulomb_matrix(const std::vector<Shell> &shells) {
             const auto n = nbf(shells);
-            Matrix result = Matrix::Zero(n, n);
+            Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
             using libint2::Engine;
             Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells), 0);
             engine.set(BraKet::xs_xs);
@@ -130,7 +127,7 @@ namespace libint2 {
                     auto bf2 = shell2bf[s2];
                     auto n2 = shells[s2].size();
                     engine.compute(shells[s1], shells[s2]);
-                    Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
+                    Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
                     result.block(bf1, bf2, n1, n2) = buf_mat;
                     if (s1 != s2)
                         result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
@@ -140,11 +137,11 @@ namespace libint2 {
         }
 
         /// @brief Sorts a vector of shells by angular momentum
-        std::vector<Shellvec> sort_by_L(const Shellvec& shells){
+        std::vector<std::vector<Shell>> split_by_L(const std::vector<Shell> &shells) {
             int lmax = max_l(shells);
-            std::vector<Shellvec> sorted_shells;
-            sorted_shells.resize(lmax+1);
-            for(auto shell:shells){
+            std::vector<std::vector<Shell>> sorted_shells;
+            sorted_shells.resize(lmax + 1);
+            for (auto shell: shells) {
                 auto l = shell.contr[0].l;
                 sorted_shells[l].push_back(shell);
             }
@@ -153,15 +150,15 @@ namespace libint2 {
 
     }// namespace detail
 
-    class DFBS_generator {
+    class DFBasisSetGenerator {
     public:
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO basis functions
         /// @param obs_name name of basis set for AO functions
         /// @param atoms vector of atoms
-        DFBS_generator(std::string obs_name,
-                       const std::vector<Atom> &atoms) : obs_name_(std::move(obs_name)), atoms_(std::move(atoms)) {
-            std::vector<Shellvec> obs_shell_vec;
-            std::vector<Shellvec> primitive_cluster;
+        DFBasisSetGenerator(std::string obs_name,
+                            const std::vector<Atom> &atoms) : obs_name_(std::move(obs_name)), atoms_(std::move(atoms)) {
+            std::vector<std::vector<Shell>> obs_shell_vec;
+            std::vector<std::vector<Shell>> primitive_cluster;
             // get AO basis shells for each atom
             for (auto atom: atoms) {
                 auto atom_bs = BasisSet(obs_name_, {atom});
@@ -174,25 +171,31 @@ namespace libint2 {
 
             //compute candidate shells
             candidate_shells_ = datail::candidate_functions(primitive_cluster);
-
-            // initialize libint2
-            libint2::initialize();
         }
 
-        DFBS_generator() = default;
+        /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO shells provided by user
+        /// @param cluster vector of vector of shells for each atom
+        DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster) {
+            std::vector<std::vector<Shell>> primitive_cluster;
+            for(auto i=0; i< cluster.size(); ++i){
+                primitive_cluster.emplace_back(uncontract(cluster[i]));
+            }
+            candidate_shells_ = datail::candidate_functions(primitive_cluster);
+        }
+        DFBasisSetGenerator() = default;
 
-        ~DFBS_generator() = default;
+        ~DFBasisSetGenerator() = default;
 
         /// @brief returns the candidate shells (full set of product functions)
-        std::vector<Shellvec> candidate_shells() {
+        std::vector<std::vector<Shell>> candidate_shells() {
             return candidate_shells_;
         }
 
         /// @brief returns the candidate shells sorted by angular momentum
-        std::vector<std::vector<Shellvec>> candidates_sorted_in_L(){
-            std::vector<std::vector<Shellvec>> sorted_shells;
-            for(auto shells:candidate_shells_){
-                sorted_shells.push_back(datail::sort_by_L(shells));
+        std::vector<std::vector<std::vector<Shell>>> candidates_sorted_in_L() {
+            std::vector<std::vector<std::vector<Shell>>> sorted_shells;
+            for (auto shells: candidate_shells_) {
+                sorted_shells.push_back(datail::split_by_L(shells));
             }
             return sorted_shells;
         }
@@ -200,8 +203,8 @@ namespace libint2 {
     private:
         std::string obs_name_;  //name of AO basis set
         std::vector<Atom> atoms_; //vector of atoms
-        std::vector<Shellvec> candidate_shells_;  //full set of product functions
-        std::vector<Shellvec> reduced_shells_;    //reduced set of product functions
+        std::vector<std::vector<Shell>> candidate_shells_;  //full set of product functions
+        std::vector<std::vector<Shell>> reduced_shells_;    //reduced set of product functions
 
     };
 

From 74cf2fcbc95d5c5620b9b6af9eb066b737300e5d Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 16 Nov 2023 23:45:22 -0500
Subject: [PATCH 04/44] DFBasisSetGenerator produces reduced set of shells
 using pivoted cholesky

---
 include/libint2/dfbs_generator.h   | 70 ++++++++++++++++++++++++++++--
 include/libint2/pivoted_cholesky.h | 43 ++++++++----------
 2 files changed, 84 insertions(+), 29 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index ce793bbe2..1a232307c 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -28,6 +28,7 @@
 #include <libint2/basis.h>
 #include <libint2/atom.h>
 #include <libint2/boys.h>
+#include <libint2/pivoted_cholesky.h>
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
 
@@ -148,15 +149,53 @@ namespace libint2 {
             return sorted_shells;
         }
 
+        std::vector<Shell> shell_pivoted_cholesky(const std::vector<Shell> shells, const double cholesky_threshold) {
+
+            auto n = shells.size(); // number of shells
+            std::vector<size_t> shell_indices; // hash map of basis function indices to shell indices
+            auto L = shells[0].contr[0].l; // all shells must have same L
+            auto nbf = libint2::nbf(shells);  // total number of basis functions in vector of shells
+            for (auto i = 0; i < n; ++i) {
+                for (auto j = 0; j < 2 * L + 1; ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b integrals
+                    shell_indices.push_back(i);
+            }
+            assert(shell_indices.size() == nbf);
+            auto C = compute_coulomb_matrix(shells);
+            std::vector<size_t> pivot(nbf);
+            for(auto i=0;i<nbf;++i){
+                pivot[i] = i;
+            }
+            // set pivot indices in ascending order of off diagonal elements of Coulomb matrix
+            // see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly repulsive interatomic potentials)
+            Eigen::MatrixXd C_off_diag = C;
+            auto col_sum = C_off_diag.colwise().sum();
+            // sort pivot indices in ascending order of column sums
+            std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) { return col_sum[i1] < col_sum[i2]; });
+            // compute Cholesky decomposition
+            auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
+
+            std::vector<Shell> reduced_shells;
+            for (auto i = 0; i < reduced_pivots.size(); ++i) {
+                // check if the reduced shell is already in reduced shells
+                if (std::find(reduced_shells.begin(), reduced_shells.end(),
+                              shells[shell_indices[reduced_pivots[i]]]) == reduced_shells.end())
+                reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
+            }
+            return reduced_shells;
+        }
+
     }// namespace detail
 
     class DFBasisSetGenerator {
     public:
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO basis functions
+        /// see: J. Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for Molecular Calculations with Atomic Orbital Basis Sets)
         /// @param obs_name name of basis set for AO functions
         /// @param atoms vector of atoms
+        /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
         DFBasisSetGenerator(std::string obs_name,
-                            const std::vector<Atom> &atoms) : obs_name_(std::move(obs_name)), atoms_(std::move(atoms)) {
+                            const std::vector<Atom> &atoms, const double cholesky_thershold = 1e-7) : obs_name_(
+                std::move(obs_name)), atoms_(std::move(atoms)) {
             std::vector<std::vector<Shell>> obs_shell_vec;
             std::vector<std::vector<Shell>> primitive_cluster;
             // get AO basis shells for each atom
@@ -171,17 +210,21 @@ namespace libint2 {
 
             //compute candidate shells
             candidate_shells_ = datail::candidate_functions(primitive_cluster);
+            cholesky_threshold_ = cholesky_thershold;
         }
 
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO shells provided by user
         /// @param cluster vector of vector of shells for each atom
-        DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster) {
+        /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
+        DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster, const double cholesky_thershold = 1e-7) {
             std::vector<std::vector<Shell>> primitive_cluster;
-            for(auto i=0; i< cluster.size(); ++i){
+            for (auto i = 0; i < cluster.size(); ++i) {
                 primitive_cluster.emplace_back(uncontract(cluster[i]));
             }
             candidate_shells_ = datail::candidate_functions(primitive_cluster);
+            cholesky_threshold_ = cholesky_thershold;
         }
+
         DFBasisSetGenerator() = default;
 
         ~DFBasisSetGenerator() = default;
@@ -192,7 +235,7 @@ namespace libint2 {
         }
 
         /// @brief returns the candidate shells sorted by angular momentum
-        std::vector<std::vector<std::vector<Shell>>> candidates_sorted_in_L() {
+        std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
             std::vector<std::vector<std::vector<Shell>>> sorted_shells;
             for (auto shells: candidate_shells_) {
                 sorted_shells.push_back(datail::split_by_L(shells));
@@ -200,9 +243,28 @@ namespace libint2 {
             return sorted_shells;
         }
 
+        std::vector<std::vector<Shell>> reduced_shells() {
+            if (reduced_shells_.size() != 0)
+                return reduced_shells_;
+            else {
+                auto candidate_splitted_in_L = candidates_splitted_in_L();
+                for (auto i = 0; i < candidate_splitted_in_L.size(); ++i) {
+                    std::vector<Shell> atom_shells;
+                    for (auto j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
+                        auto reduced_shells = datail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
+                                                                             cholesky_threshold_);
+                        atom_shells.insert(atom_shells.end(), reduced_shells.begin(), reduced_shells.end());
+                    }
+                    reduced_shells_.push_back(atom_shells);
+                }
+            }
+            return reduced_shells_;
+        }
+
     private:
         std::string obs_name_;  //name of AO basis set
         std::vector<Atom> atoms_; //vector of atoms
+        double cholesky_threshold_;
         std::vector<std::vector<Shell>> candidate_shells_;  //full set of product functions
         std::vector<std::vector<Shell>> reduced_shells_;    //reduced set of product functions
 
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 3f1252f00..6eb695f78 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -1,25 +1,9 @@
-/*
- *  Copyright (C) 2004-2021 Edward F. Valeev
- *
- *  This file is part of Libint.
- *
- *  Libint is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU Lesser General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  Libint is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public License
- *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#ifndef _libint2_src_lib_libint_pivoted_cholesky_h_
-#define _libint2_src_lib_libint_pivoted_cholesky_h_
+//
+// Created by Kshitij Surjuse on 11/15/23.
+//
+
+#ifndef LIBINT_PIVOTED_CHOLESKY_H
+#define LIBINT_PIVOTED_CHOLESKY_H
 
 
 #include <iostream>
@@ -33,7 +17,8 @@ namespace libint2 {
     /// @param tolerance tolerance for the error
     /// @param pivot initial pivot indices
     /// @return pivoted Cholesky decomposition of A
-    std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A, const double &tolerance, std::vector<size_t> pivot) {
+    std::vector<size_t>
+    pivoted_cholesky(const Eigen::MatrixXd &A, const double &tolerance, const std::vector<size_t> &pivot) {
         // number of elements in A
         auto n = A.rows();
         // diagonal elements of A
@@ -114,9 +99,17 @@ namespace libint2 {
         L.transposeInPlace();
         // Drop unnecessary columns
         L.conservativeResize(n, m);
-        return piv;
+
+        // return reduced pivot indices
+        std::vector<size_t> reduced_piv;
+        reduced_piv.reserve(m);
+        for (auto i = 0; i < m; ++i) {
+            reduced_piv.push_back(piv[i]);
+        }
+
+        return reduced_piv;
     }
 
 }
 
-#endif /* _libint2_src_lib_libint_pivoted_cholesky_h_ */
+#endif //LIBINT_PIVOTED_CHOLESKY_H

From c2f1dac8fb13acdc75d53e2823d73405d7ed24a0 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 14:38:40 -0500
Subject: [PATCH 05/44] DFBasisSetGenerator return a reduced BasisSet, updated
 hartree-fock++.cc btas code

---
 include/libint2/dfbs_generator.h     |  26 +++++
 tests/hartree-fock/hartree-fock++.cc | 168 ++++++++++++++-------------
 2 files changed, 115 insertions(+), 79 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 1a232307c..cdb13908c 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -149,6 +149,10 @@ namespace libint2 {
             return sorted_shells;
         }
 
+        /// @brief computes the reduced set of product functions via pivoted Cholesky decomposition
+        /// @param shells set of shells
+        /// @param cholesky_threshold threshold for choosing a product function via pivoted Cholesky decomposition
+        /// @return reduced set of product functions
         std::vector<Shell> shell_pivoted_cholesky(const std::vector<Shell> shells, const double cholesky_threshold) {
 
             auto n = shells.size(); // number of shells
@@ -234,6 +238,16 @@ namespace libint2 {
             return candidate_shells_;
         }
 
+        /// @brief returns the candidate basis set (full set of product functions)
+        /// @warning generates huge and heavily linearly dependent basis sets
+        BasisSet product_basis(){
+            std::vector<Shell> product_shells;
+            for(auto shells: candidate_shells_){
+                product_shells.insert(product_shells.end(), shells.begin(), shells.end());
+            }
+            return BasisSet(std::move(product_shells));
+        }
+
         /// @brief returns the candidate shells sorted by angular momentum
         std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
             std::vector<std::vector<std::vector<Shell>>> sorted_shells;
@@ -243,6 +257,7 @@ namespace libint2 {
             return sorted_shells;
         }
 
+        /// @brief returns the reduced shells (reduced set of product functions) computed via pivoted Cholesky decomposition
         std::vector<std::vector<Shell>> reduced_shells() {
             if (reduced_shells_.size() != 0)
                 return reduced_shells_;
@@ -261,6 +276,17 @@ namespace libint2 {
             return reduced_shells_;
         }
 
+        /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
+        BasisSet reduced_basis() {
+            auto reduced_cluster = reduced_shells();
+            std::vector<Shell> reduced_shells;
+            for (auto shells: reduced_cluster) {
+                reduced_shells.insert(reduced_shells.end(), shells.begin(), shells.end());
+            }
+            return BasisSet(std::move(reduced_shells));
+        }
+
+
     private:
         std::string obs_name_;  //name of AO basis set
         std::vector<Atom> atoms_; //vector of atoms
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index 44af2b2f9..edfd9f4d6 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -49,6 +49,8 @@
 #include <libint2/chemistry/sto3g_atomic_density.h>
 #include <libint2/lcao/molden.h>
 #include <libint2.hpp>
+#include <libint2/dfbs_generator.h>
+
 #if !LIBINT2_CONSTEXPR_STATICS
 #  include <libint2/statics_definition.h>
 #endif
@@ -57,6 +59,7 @@
 #include <omp.h>
 #endif
 
+
 /// to use precomputed shell pair data must decide on max precision a priori
 const auto max_engine_precision = std::numeric_limits<double>::epsilon() / 1e10;
 
@@ -75,6 +78,8 @@ typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
 typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
     DiagonalMatrix;
 
+typedef btas::Tensor<double> tensor;
+
 using libint2::Shell;
 using libint2::Atom;
 using libint2::BasisSet;
@@ -166,10 +171,8 @@ struct DFFockEngine {
   const BasisSet& dfbs;
   DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs)
       : obs(_obs), dfbs(_dfbs) {}
-
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3d;
-  typedef btas::Tensor<double, Range3d> Tensor3d;
-  Tensor3d xyK;
+  typedef btas::Tensor<double> Tensor;
+  Tensor xyK;
 
   // a DF-based builder, using coefficients of occupied MOs
   Matrix compute_2body_fock_dfC(const Matrix& Cocc);
@@ -216,21 +219,26 @@ int main(int argc, char* argv[]) {
     // filename (.xyz) from the command line
     const auto filename = (argc > 1) ? argv[1] : "h2o.xyz";
     const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ";
-    bool do_density_fitting = false;
+      bool do_density_fitting = false;
+      double cholesky_threshold = 1e-4;
 #ifdef HAVE_DENSITY_FITTING
-    do_density_fitting = (argc > 3);
-    const auto dfbasisname = do_density_fitting ? argv[3] : "";
+      do_density_fitting = (argc > 3);
+      const auto dfbasisname = do_density_fitting ? argv[3] : "";
+      if ((strcmp(dfbasisname, "autodf") == 0) && argc > 4) {
+          cholesky_threshold = std::stod(argv[4]);
+          std::cout << "New Cholesky threshold for generating df basis set = " << cholesky_threshold << std::endl;
+      }
 #endif
-    std::vector<Atom> atoms = read_geometry(filename);
-
-    // set up thread pool
-    {
-      using libint2::nthreads;
-      auto nthreads_cstr = getenv("LIBINT_NUM_THREADS");
-      nthreads = 1;
-      if (nthreads_cstr && strcmp(nthreads_cstr, "")) {
-        std::istringstream iss(nthreads_cstr);
-        iss >> nthreads;
+      std::vector<Atom> atoms = read_geometry(filename);
+
+      // set up thread pool
+      {
+          using libint2::nthreads;
+          auto nthreads_cstr = getenv("LIBINT_NUM_THREADS");
+          nthreads = 1;
+          if (nthreads_cstr && strcmp(nthreads_cstr, "")) {
+              std::istringstream iss(nthreads_cstr);
+              iss >> nthreads;
         if (nthreads > 1 << 16 || nthreads <= 0) nthreads = 1;
       }
 #if defined(_OPENMP)
@@ -254,33 +262,38 @@ int main(int argc, char* argv[]) {
     // compute the nuclear repulsion energy
     auto enuc = 0.0;
     for (auto i = 0; i < atoms.size(); i++)
-      for (auto j = i + 1; j < atoms.size(); j++) {
-        auto xij = atoms[i].x - atoms[j].x;
-        auto yij = atoms[i].y - atoms[j].y;
-        auto zij = atoms[i].z - atoms[j].z;
-        auto r2 = xij * xij + yij * yij + zij * zij;
-        auto r = sqrt(r2);
-        enuc += atoms[i].atomic_number * atoms[j].atomic_number / r;
-      }
-    cout << "Nuclear repulsion energy = " << std::setprecision(15) << enuc
-         << endl;
+        for (auto j = i + 1; j < atoms.size(); j++) {
+            auto xij = atoms[i].x - atoms[j].x;
+            auto yij = atoms[i].y - atoms[j].y;
+            auto zij = atoms[i].z - atoms[j].z;
+            auto r2 = xij * xij + yij * yij + zij * zij;
+            auto r = sqrt(r2);
+            enuc += atoms[i].atomic_number * atoms[j].atomic_number / r;
+        }
+      cout << "Nuclear repulsion energy = " << std::setprecision(15) << enuc
+           << endl;
 
-    libint2::Shell::do_enforce_unit_normalization(false);
+      libint2::Shell::do_enforce_unit_normalization(false);
 
-    cout << "Atomic Cartesian coordinates (a.u.):" << endl;
-    for (const auto& a : atoms)
-      std::cout << a.atomic_number << " " << a.x << " " << a.y << " " << a.z
-                << std::endl;
+      cout << "Atomic Cartesian coordinates (a.u.):" << endl;
+      for (const auto &a: atoms)
+          std::cout << a.atomic_number << " " << a.x << " " << a.y << " " << a.z
+                    << std::endl;
 
-    BasisSet obs(basisname, atoms);
-    cout << "orbital basis set rank = " << obs.nbf() << endl;
+      BasisSet obs(basisname, atoms);
+      cout << "orbital basis set rank = " << obs.nbf() << endl;
 
 #ifdef HAVE_DENSITY_FITTING
-    BasisSet dfbs;
-    if (do_density_fitting) {
-      dfbs = BasisSet(dfbasisname, atoms);
-      cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
-    }
+      BasisSet dfbs;
+      if (do_density_fitting) {
+          if (strcmp(dfbasisname, "autodf") == 0) {
+              if (!libint2::initialized()) libint2::initialize();
+              libint2::DFBasisSetGenerator dfbs_generator(basisname, atoms, cholesky_threshold);
+              dfbs = dfbs_generator.reduced_basis();
+          } else
+              dfbs = BasisSet(dfbasisname, atoms);
+          cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
+      }
 #endif  // HAVE_DENSITY_FITTING
 
     /*** =========================== ***/
@@ -288,34 +301,36 @@ int main(int argc, char* argv[]) {
     /*** =========================== ***/
 
     // initializes the Libint integrals library ... now ready to compute
-    libint2::initialize();
-
-    // compute OBS non-negligible shell-pair list
-    {
-      const auto tstart = std::chrono::high_resolution_clock::now();
-      std::tie(obs_shellpair_list, obs_shellpair_data) = compute_shellpairs(obs);
-      size_t nsp = 0;
-      for (auto& sp : obs_shellpair_list) {
-        nsp += sp.second.size();
+      if (!libint2::initialized())
+          libint2::initialize();
+
+      // compute OBS non-negligible shell-pair list
+      {
+          const auto tstart = std::chrono::high_resolution_clock::now();
+          std::tie(obs_shellpair_list, obs_shellpair_data) = compute_shellpairs(obs);
+          size_t nsp = 0;
+          for (auto &sp: obs_shellpair_list) {
+              nsp += sp.second.size();
+          }
+          const auto tstop = std::chrono::high_resolution_clock::now();
+          const std::chrono::duration<double> time_elapsed = tstop - tstart;
+          std::cout << "computed shell-pair data in " << time_elapsed.count()
+                    << " seconds: # of {all,non-negligible} shell-pairs = {"
+                    << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
+                    << std::endl;
       }
-      const auto tstop = std::chrono::high_resolution_clock::now();
-      const std::chrono::duration<double> time_elapsed = tstop - tstart;
-      std::cout << "computed shell-pair data in " << time_elapsed.count() << " seconds: # of {all,non-negligible} shell-pairs = {"
-                << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
-                << std::endl;
-    }
 
-    // compute one-body integrals
-    auto S = compute_1body_ints<Operator::overlap>(obs)[0];
-    auto T = compute_1body_ints<Operator::kinetic>(obs)[0];
-    auto V = compute_1body_ints<Operator::nuclear>(obs, libint2::make_point_charges(atoms))[0];
-    Matrix H = T + V;
-    T.resize(0, 0);
-    V.resize(0, 0);
-
-    // compute orthogonalizer X such that X.transpose() . S . X = I
-    Matrix X, Xinv;
-    double XtX_condition_number;  // condition number of "re-conditioned"
+      // compute one-body integrals
+      auto S = compute_1body_ints<Operator::overlap>(obs)[0];
+      auto T = compute_1body_ints<Operator::kinetic>(obs)[0];
+      auto V = compute_1body_ints<Operator::nuclear>(obs, libint2::make_point_charges(atoms))[0];
+      Matrix H = T + V;
+      T.resize(0, 0);
+      V.resize(0, 0);
+
+      // compute orthogonalizer X such that X.transpose() . S . X = I
+      Matrix X, Xinv;
+      double XtX_condition_number;  // condition number of "re-conditioned"
                                   // overlap obtained as Xinv.transpose() . Xinv
     // one should think of columns of Xinv as the conditioned basis
     // Re: name ... cond # (Xinv.transpose() . Xinv) = cond # (X.transpose() .
@@ -2162,11 +2177,6 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   std::vector<libint2::Timers<5>> timers(nthreads);
   for(auto& timer: timers) timer.set_now_overhead(25);
 
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 1>> Range1d;
-  typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2d;
-  typedef btas::Tensor<double, Range1d> Tensor1d;
-  typedef btas::Tensor<double, Range2d> Tensor2d;
-
   // using first time? compute 3-center ints and transform to inv sqrt
   // representation
   if (xyK.size() == 0) {
@@ -2192,7 +2202,7 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
     auto shell2bf = obs.shell2bf();
     auto shell2bf_df = dfbs.shell2bf();
 
-    Tensor3d Zxy{ndf, n, n};
+    Tensor Zxy{ndf, n, n};
 
     auto lambda = [&](int thread_id) {
 
@@ -2269,12 +2279,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
     //  std::cout << "||V^-1 - L^-1^t L^-1|| = " << (V.inverse() - Linv_t *
     //  Linv_t.transpose()).norm() << std::endl;
 
-    Tensor2d K{ndf, ndf};
+    Tensor K{ndf, ndf};
     std::copy(Linv_t.data(), Linv_t.data() + ndf * ndf, K.begin());
 
-    xyK = Tensor3d{n, n, ndf};
+    xyK = Tensor{n, n, ndf};
     btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4});
-    Zxy = Tensor3d{0, 0, 0};  // release memory
+    Zxy = Tensor{0, 0, 0};  // release memory
 
     timers[0].stop(2);
     std::cout << "time for integrals metric tform = " << timers[0].read(2)
@@ -2285,12 +2295,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   timers[0].start(3);
 
   const auto nocc = Cocc.cols();
-  Tensor2d Co{n, nocc};
+  Tensor Co{n, nocc};
   std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin());
-  Tensor3d xiK{n, nocc, ndf};
+  Tensor xiK{n, nocc, ndf};
   btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3});
 
-  Tensor2d G{n, n};
+  Tensor G{n, n};
   btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4});
 
   timers[0].stop(3);
@@ -2299,9 +2309,9 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
   // compute Coulomb
   timers[0].start(4);
 
-  Tensor1d Jtmp{ndf};
+  Tensor Jtmp{ndf};
   btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3});
-  xiK = Tensor3d{0, 0, 0};
+  xiK = Tensor{0, 0, 0};
   btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2});
 
   timers[0].stop(4);

From 7d43e26966a7d3977aee5f9b1543e3c09bda91f6 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 14:56:12 -0500
Subject: [PATCH 06/44] refactored `uncontract()` function from `basis.h.in` to
 `shell.h`

---
 include/libint2/basis.h.in | 19 -------------------
 include/libint2/shell.h    | 19 +++++++++++++++++++
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/libint2/basis.h.in b/include/libint2/basis.h.in
index aa37be2a0..d3d156ec9 100644
--- a/include/libint2/basis.h.in
+++ b/include/libint2/basis.h.in
@@ -85,25 +85,6 @@ namespace libint2 {
     return l;
   }
 
-    /// Computes uncontracted shells from a vector of shells
-    /// @param[in] cluster a vector of shells
-    /// @return a vector of uncontracted shells
-    std::vector<Shell> uncontract(
-            const std::vector<Shell> &cluster) {
-        std::vector<Shell> primitive_cluster;
-        for (const auto &contracted_shell: cluster) {
-            for (auto p = 0; p < contracted_shell.nprim(); p++) {
-                const auto prim_shell = contracted_shell.extract_primitive(p, true);
-                // if dealing with generally contracted basis (e.g., cc-pvxz) represented
-                // as a segmented basis need to remove duplicates
-                if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
-                              prim_shell) == primitive_cluster.end())
-                    primitive_cluster.emplace_back(std::move(prim_shell));
-            }
-        }
-        return primitive_cluster;
-    }
-
   /// BasisSet is a slightly decorated \c std::vector of \c libint2::Shell objects.
   class BasisSet : private std::vector<libint2::Shell> {
     public:
diff --git a/include/libint2/shell.h b/include/libint2/shell.h
index 0f7c6d51b..3f1ce9f7b 100644
--- a/include/libint2/shell.h
+++ b/include/libint2/shell.h
@@ -376,6 +376,25 @@ namespace libint2 {
   // clang-format on
 
   namespace detail {
+      /// Computes uncontracted shells from a vector of shells
+      /// @param[in] cluster a vector of shells
+      /// @return a vector of uncontracted shells
+      std::vector <Shell> uncontract(
+              const std::vector <Shell> &cluster) {
+          std::vector <Shell> primitive_cluster;
+          for (const auto &contracted_shell: cluster) {
+              for (auto p = 0; p < contracted_shell.nprim(); p++) {
+                  const auto prim_shell = contracted_shell.extract_primitive(p, true);
+                  // if dealing with generally contracted basis (e.g., cc-pvxz) represented
+                  // as a segmented basis need to remove duplicates
+                  if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
+                                prim_shell) == primitive_cluster.end())
+                      primitive_cluster.emplace_back(std::move(prim_shell));
+              }
+          }
+          return primitive_cluster;
+      }
+
     inline ScreeningMethod& default_screening_method_accessor() {
       static ScreeningMethod default_screening_method = ScreeningMethod::Original;
       return default_screening_method;

From 2cea2c4ae01b56dadfbc97a9ca6e9de544dc529d Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 15:22:33 -0500
Subject: [PATCH 07/44] increased supported `with-eri3-max-am` to 13

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f61eb2515..8056ad680 100644
--- a/configure.ac
+++ b/configure.ac
@@ -524,7 +524,7 @@ AS_HELP_STRING([--with-eri3-max-am=N],[Support 3-center ERIs for Gaussians of an
      else
       ERI3_MAX_AM=$withval
      fi
-     if test $ERI3_MAX_AM -ge 8; then
+     if test $ERI3_MAX_AM -ge 13; then
       AC_MSG_ERROR([Value for --with-eri3-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI3_MAX_AM)

From 56801c3569b260f414a20160f3d58f3f91e3bd97 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 16:46:00 -0500
Subject: [PATCH 08/44] bugfix: `uncontract()` is in 'libint2::detail'

---
 include/libint2/dfbs_generator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index cdb13908c..4510a2251 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -209,7 +209,7 @@ namespace libint2 {
             }
             // get primitive shells from AO functions
             for (auto obs_shells: obs_shell_vec) {
-                primitive_cluster.emplace_back(uncontract(obs_shells));
+                primitive_cluster.emplace_back(detail::uncontract(obs_shells));
             }
 
             //compute candidate shells
@@ -223,7 +223,7 @@ namespace libint2 {
         DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster, const double cholesky_thershold = 1e-7) {
             std::vector<std::vector<Shell>> primitive_cluster;
             for (auto i = 0; i < cluster.size(); ++i) {
-                primitive_cluster.emplace_back(uncontract(cluster[i]));
+                primitive_cluster.emplace_back(detail::uncontract(cluster[i]));
             }
             candidate_shells_ = datail::candidate_functions(primitive_cluster);
             cholesky_threshold_ = cholesky_thershold;

From 1473522793e7d5c12d2ff51b697c4e2e011e2f4b Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 17:11:59 -0500
Subject: [PATCH 09/44] added dox for `DFBasisGenerator`, made
 `reduced_basis()` return const `Basis`

---
 include/libint2/dfbs_generator.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 4510a2251..08d1273b0 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -190,6 +190,9 @@ namespace libint2 {
 
     }// namespace detail
 
+    /// @brief class produces density fitting basis sets from products of AO basis functions
+    /// eliminates linearly dependent functions via pivoted Cholesky decomposition
+    /// see: J. Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for Molecular Calculations with Atomic Orbital Basis Sets)
     class DFBasisSetGenerator {
     public:
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO basis functions
@@ -240,9 +243,9 @@ namespace libint2 {
 
         /// @brief returns the candidate basis set (full set of product functions)
         /// @warning generates huge and heavily linearly dependent basis sets
-        BasisSet product_basis(){
-            std::vector<Shell> product_shells;
-            for(auto shells: candidate_shells_){
+        const BasisSet product_basis() {
+            std::vector <Shell> product_shells;
+            for (auto &&shells: candidate_shells_) {
                 product_shells.insert(product_shells.end(), shells.begin(), shells.end());
             }
             return BasisSet(std::move(product_shells));
@@ -251,7 +254,7 @@ namespace libint2 {
         /// @brief returns the candidate shells sorted by angular momentum
         std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
             std::vector<std::vector<std::vector<Shell>>> sorted_shells;
-            for (auto shells: candidate_shells_) {
+            for (auto &&shells: candidate_shells_) {
                 sorted_shells.push_back(datail::split_by_L(shells));
             }
             return sorted_shells;
@@ -263,9 +266,9 @@ namespace libint2 {
                 return reduced_shells_;
             else {
                 auto candidate_splitted_in_L = candidates_splitted_in_L();
-                for (auto i = 0; i < candidate_splitted_in_L.size(); ++i) {
+                for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
                     std::vector<Shell> atom_shells;
-                    for (auto j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
+                    for (size_t j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
                         auto reduced_shells = datail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
                                                                              cholesky_threshold_);
                         atom_shells.insert(atom_shells.end(), reduced_shells.begin(), reduced_shells.end());
@@ -277,10 +280,10 @@ namespace libint2 {
         }
 
         /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
-        BasisSet reduced_basis() {
+        const BasisSet reduced_basis() {
             auto reduced_cluster = reduced_shells();
-            std::vector<Shell> reduced_shells;
-            for (auto shells: reduced_cluster) {
+            std::vector <Shell> reduced_shells;
+            for (auto &&shells: reduced_cluster) {
                 reduced_shells.insert(reduced_shells.end(), shells.begin(), shells.end());
             }
             return BasisSet(std::move(reduced_shells));

From b62e2facf5656a9fec7d59b19fe477972837b54f Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 18:38:58 -0500
Subject: [PATCH 10/44] `uncontract()` refactored back to `dfbs_generator.h`
 because its not used anywhere else and is causing symbol duplication

---
 include/libint2/dfbs_generator.h | 31 +++++++++++++++++++++++++------
 include/libint2/shell.h          | 19 -------------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 08d1273b0..941835c98 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -24,7 +24,6 @@
 #include <algorithm>
 #include <libint2.h>
 #include <math.h>
-#include <libint2/shell.h>
 #include <libint2/basis.h>
 #include <libint2/atom.h>
 #include <libint2/boys.h>
@@ -34,7 +33,27 @@
 
 namespace libint2 {
 
-    namespace datail {
+    namespace detail {
+
+        /// Computes uncontracted shells from a vector of shells
+        /// @param[in] cluster a vector of shells
+        /// @return a vector of uncontracted shells
+        std::vector <Shell> uncontract(
+                const std::vector <Shell> &cluster) {
+            std::vector <Shell> primitive_cluster;
+            for (const auto &contracted_shell: cluster) {
+                for (auto p = 0; p < contracted_shell.nprim(); p++) {
+                    const auto prim_shell = contracted_shell.extract_primitive(p, true);
+                    // if dealing with generally contracted basis (e.g., cc-pvxz) represented
+                    // as a segmented basis need to remove duplicates
+                    if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
+                                  prim_shell) == primitive_cluster.end())
+                        primitive_cluster.emplace_back(std::move(prim_shell));
+                }
+            }
+            return primitive_cluster;
+        }
+
 
         /// @brief returns \Gamma(x)  of x
         double gamma_function(const double &x) {
@@ -216,7 +235,7 @@ namespace libint2 {
             }
 
             //compute candidate shells
-            candidate_shells_ = datail::candidate_functions(primitive_cluster);
+            candidate_shells_ = detail::candidate_functions(primitive_cluster);
             cholesky_threshold_ = cholesky_thershold;
         }
 
@@ -228,7 +247,7 @@ namespace libint2 {
             for (auto i = 0; i < cluster.size(); ++i) {
                 primitive_cluster.emplace_back(detail::uncontract(cluster[i]));
             }
-            candidate_shells_ = datail::candidate_functions(primitive_cluster);
+            candidate_shells_ = detail::candidate_functions(primitive_cluster);
             cholesky_threshold_ = cholesky_thershold;
         }
 
@@ -255,7 +274,7 @@ namespace libint2 {
         std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
             std::vector<std::vector<std::vector<Shell>>> sorted_shells;
             for (auto &&shells: candidate_shells_) {
-                sorted_shells.push_back(datail::split_by_L(shells));
+                sorted_shells.push_back(detail::split_by_L(shells));
             }
             return sorted_shells;
         }
@@ -269,7 +288,7 @@ namespace libint2 {
                 for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
                     std::vector<Shell> atom_shells;
                     for (size_t j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
-                        auto reduced_shells = datail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
+                        auto reduced_shells = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
                                                                              cholesky_threshold_);
                         atom_shells.insert(atom_shells.end(), reduced_shells.begin(), reduced_shells.end());
                     }
diff --git a/include/libint2/shell.h b/include/libint2/shell.h
index 3f1ce9f7b..0f7c6d51b 100644
--- a/include/libint2/shell.h
+++ b/include/libint2/shell.h
@@ -376,25 +376,6 @@ namespace libint2 {
   // clang-format on
 
   namespace detail {
-      /// Computes uncontracted shells from a vector of shells
-      /// @param[in] cluster a vector of shells
-      /// @return a vector of uncontracted shells
-      std::vector <Shell> uncontract(
-              const std::vector <Shell> &cluster) {
-          std::vector <Shell> primitive_cluster;
-          for (const auto &contracted_shell: cluster) {
-              for (auto p = 0; p < contracted_shell.nprim(); p++) {
-                  const auto prim_shell = contracted_shell.extract_primitive(p, true);
-                  // if dealing with generally contracted basis (e.g., cc-pvxz) represented
-                  // as a segmented basis need to remove duplicates
-                  if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
-                                prim_shell) == primitive_cluster.end())
-                      primitive_cluster.emplace_back(std::move(prim_shell));
-              }
-          }
-          return primitive_cluster;
-      }
-
     inline ScreeningMethod& default_screening_method_accessor() {
       static ScreeningMethod default_screening_method = ScreeningMethod::Original;
       return default_screening_method;

From 0d243c6966e202287dbd47e9ce9c20f15616e1c6 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 17 Nov 2023 20:46:42 -0500
Subject: [PATCH 11/44] bugfix: `hartree-fock++` btas tensor def if btas
 provided

---
 tests/hartree-fock/hartree-fock++.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index edfd9f4d6..4df25ee1e 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -78,7 +78,9 @@ typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
 typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
     DiagonalMatrix;
 
+#ifdef LIBINT2_HAVE_BTAS
 typedef btas::Tensor<double> tensor;
+#endif
 
 using libint2::Shell;
 using libint2::Atom;

From 49d1667cc52a040b8fa741c9a71672c62d3bdc74 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Sun, 19 Nov 2023 13:31:46 -0500
Subject: [PATCH 12/44] used && in range based for loop

---
 include/libint2/dfbs_generator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 941835c98..19ddfd777 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -120,7 +120,7 @@ namespace libint2 {
             result.reserve(shells.size());
 
             size_t n = 0;
-            for (auto shell: shells) {
+            for (auto &&shell: shells) {
                 result.push_back(n);
                 n += shell.size();
             }
@@ -135,7 +135,7 @@ namespace libint2 {
             const auto n = nbf(shells);
             Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
             using libint2::Engine;
-            Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells), 0);
+            Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
             engine.set(BraKet::xs_xs);
             engine.set(ScreeningMethod::Conservative);
             auto shell2bf = map_shell_to_basis_function(shells);

From 2d45d6948316f9ef8b4d4378ab904e93bdcbdfed Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 20 Nov 2023 17:00:40 -0500
Subject: [PATCH 13/44] non-templated functions in headers are now inline

---
 include/libint2/dfbs_generator.h   | 18 +++++++++---------
 include/libint2/pivoted_cholesky.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 19ddfd777..1104f5d3e 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -38,7 +38,7 @@ namespace libint2 {
         /// Computes uncontracted shells from a vector of shells
         /// @param[in] cluster a vector of shells
         /// @return a vector of uncontracted shells
-        std::vector <Shell> uncontract(
+        inline std::vector <Shell> uncontract(
                 const std::vector <Shell> &cluster) {
             std::vector <Shell> primitive_cluster;
             for (const auto &contracted_shell: cluster) {
@@ -56,7 +56,7 @@ namespace libint2 {
 
 
         /// @brief returns \Gamma(x)  of x
-        double gamma_function(const double &x) {
+        inline double gamma_function(const double &x) {
             return std::tgamma(x);
         }
 
@@ -65,7 +65,7 @@ namespace libint2 {
         /// @param shell2 second shell
         /// @param L total angular momentum of product function
         /// @return effective exponent of product function
-        double alpha_eff(const Shell &shell1, const Shell &shell2, const int &L) {
+        inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int &L) {
             auto alpha1 = shell1.alpha[0];
             auto alpha2 = shell2.alpha[0];
             auto l1 = shell1.contr[0].l;
@@ -78,7 +78,7 @@ namespace libint2 {
 
         /// @brief creates a set of product functions from a set of primitive shells
         /// @param primitive_shells set of primitive shells
-        std::vector<Shell> product_functions(const std::vector<Shell> &primitive_shells) {
+        inline std::vector<Shell> product_functions(const std::vector<Shell> &primitive_shells) {
             std::vector<Shell> product_functions;
             for (auto i = 0; i < primitive_shells.size(); ++i) {
                 for (auto j = 0; j <= i; ++j) {
@@ -106,7 +106,7 @@ namespace libint2 {
         /// @brief creates a set of candidate product shells from a set of primitive shells
         /// @param primitive_shells set of primitive shells
         /// @return set of candidate product shells
-        std::vector<std::vector<Shell>> candidate_functions(const std::vector<std::vector<Shell>> &primitive_shells) {
+        inline std::vector<std::vector<Shell>> candidate_functions(const std::vector<std::vector<Shell>> &primitive_shells) {
             std::vector<std::vector<Shell>> candidate_functions;
             for (auto i = 0; i < primitive_shells.size(); ++i) {
                 candidate_functions.push_back(product_functions(primitive_shells[i]));
@@ -115,7 +115,7 @@ namespace libint2 {
         }
 
         /// @brief returns a hash map of shell indices to basis function indices
-        std::vector<size_t> map_shell_to_basis_function(const std::vector<libint2::Shell> &shells) {
+        inline std::vector<size_t> map_shell_to_basis_function(const std::vector<libint2::Shell> &shells) {
             std::vector<size_t> result;
             result.reserve(shells.size());
 
@@ -131,7 +131,7 @@ namespace libint2 {
         /// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
         /// @param shells set of shells
         /// @return Coulomb matrix
-        Eigen::MatrixXd compute_coulomb_matrix(const std::vector<Shell> &shells) {
+        inline Eigen::MatrixXd compute_coulomb_matrix(const std::vector<Shell> &shells) {
             const auto n = nbf(shells);
             Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
             using libint2::Engine;
@@ -157,7 +157,7 @@ namespace libint2 {
         }
 
         /// @brief Sorts a vector of shells by angular momentum
-        std::vector<std::vector<Shell>> split_by_L(const std::vector<Shell> &shells) {
+        inline std::vector<std::vector<Shell>> split_by_L(const std::vector<Shell> &shells) {
             int lmax = max_l(shells);
             std::vector<std::vector<Shell>> sorted_shells;
             sorted_shells.resize(lmax + 1);
@@ -172,7 +172,7 @@ namespace libint2 {
         /// @param shells set of shells
         /// @param cholesky_threshold threshold for choosing a product function via pivoted Cholesky decomposition
         /// @return reduced set of product functions
-        std::vector<Shell> shell_pivoted_cholesky(const std::vector<Shell> shells, const double cholesky_threshold) {
+        inline std::vector<Shell> shell_pivoted_cholesky(const std::vector<Shell> shells, const double cholesky_threshold) {
 
             auto n = shells.size(); // number of shells
             std::vector<size_t> shell_indices; // hash map of basis function indices to shell indices
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 6eb695f78..6e19d4921 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -17,7 +17,7 @@ namespace libint2 {
     /// @param tolerance tolerance for the error
     /// @param pivot initial pivot indices
     /// @return pivoted Cholesky decomposition of A
-    std::vector<size_t>
+    inline std::vector<size_t>
     pivoted_cholesky(const Eigen::MatrixXd &A, const double &tolerance, const std::vector<size_t> &pivot) {
         // number of elements in A
         auto n = A.rows();

From c4886a7609ea14e48a315bb7374c06cecc3aa017 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Tue, 21 Nov 2023 16:37:43 -0500
Subject: [PATCH 14/44] `DFBasisGenerator` generates density fitting basis set
 for one atom not vector of atoms

---
 include/libint2/dfbs_generator.h     | 101 ++++++++-------------------
 tests/hartree-fock/hartree-fock++.cc |   9 ++-
 2 files changed, 36 insertions(+), 74 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 1104f5d3e..66ca502d7 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -39,19 +39,19 @@ namespace libint2 {
         /// @param[in] cluster a vector of shells
         /// @return a vector of uncontracted shells
         inline std::vector <Shell> uncontract(
-                const std::vector <Shell> &cluster) {
-            std::vector <Shell> primitive_cluster;
-            for (const auto &contracted_shell: cluster) {
+                const std::vector <Shell> &shells) {
+            std::vector <Shell> primitive_shells;
+            for (const auto &contracted_shell: shells) {
                 for (auto p = 0; p < contracted_shell.nprim(); p++) {
                     const auto prim_shell = contracted_shell.extract_primitive(p, true);
                     // if dealing with generally contracted basis (e.g., cc-pvxz) represented
                     // as a segmented basis need to remove duplicates
-                    if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
-                                  prim_shell) == primitive_cluster.end())
-                        primitive_cluster.emplace_back(std::move(prim_shell));
+                    if (std::find(primitive_shells.begin(), primitive_shells.end(),
+                                  prim_shell) == primitive_shells.end())
+                        primitive_shells.emplace_back(std::move(prim_shell));
                 }
             }
-            return primitive_cluster;
+            return primitive_shells;
         }
 
 
@@ -106,12 +106,8 @@ namespace libint2 {
         /// @brief creates a set of candidate product shells from a set of primitive shells
         /// @param primitive_shells set of primitive shells
         /// @return set of candidate product shells
-        inline std::vector<std::vector<Shell>> candidate_functions(const std::vector<std::vector<Shell>> &primitive_shells) {
-            std::vector<std::vector<Shell>> candidate_functions;
-            for (auto i = 0; i < primitive_shells.size(); ++i) {
-                candidate_functions.push_back(product_functions(primitive_shells[i]));
-            }
-            return candidate_functions;
+        inline std::vector<Shell> candidate_functions(const std::vector<Shell> &primitive_shells) {
+            return product_functions(primitive_shells);
         }
 
         /// @brief returns a hash map of shell indices to basis function indices
@@ -220,34 +216,23 @@ namespace libint2 {
         /// @param atoms vector of atoms
         /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
         DFBasisSetGenerator(std::string obs_name,
-                            const std::vector<Atom> &atoms, const double cholesky_thershold = 1e-7) : obs_name_(
-                std::move(obs_name)), atoms_(std::move(atoms)) {
-            std::vector<std::vector<Shell>> obs_shell_vec;
-            std::vector<std::vector<Shell>> primitive_cluster;
+                            const Atom &atom, const double cholesky_thershold = 1e-7) {
             // get AO basis shells for each atom
-            for (auto atom: atoms) {
-                auto atom_bs = BasisSet(obs_name_, {atom});
-                obs_shell_vec.emplace_back(atom_bs.shells());
-            }
+            auto atom_bs = BasisSet(obs_name, {atom});
+            auto obs_shells = atom_bs.shells();
             // get primitive shells from AO functions
-            for (auto obs_shells: obs_shell_vec) {
-                primitive_cluster.emplace_back(detail::uncontract(obs_shells));
-            }
-
+            auto primitive_shells = detail::uncontract(obs_shells);
             //compute candidate shells
-            candidate_shells_ = detail::candidate_functions(primitive_cluster);
+            candidate_shells_ = detail::candidate_functions(primitive_shells);
             cholesky_threshold_ = cholesky_thershold;
         }
 
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO shells provided by user
         /// @param cluster vector of vector of shells for each atom
         /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster, const double cholesky_thershold = 1e-7) {
-            std::vector<std::vector<Shell>> primitive_cluster;
-            for (auto i = 0; i < cluster.size(); ++i) {
-                primitive_cluster.emplace_back(detail::uncontract(cluster[i]));
-            }
-            candidate_shells_ = detail::candidate_functions(primitive_cluster);
+        DFBasisSetGenerator(std::vector<Shell> shells, const double cholesky_thershold = 1e-7) {
+            auto primitive_shells = detail::uncontract(shells);
+            candidate_shells_ = detail::candidate_functions(primitive_shells);
             cholesky_threshold_ = cholesky_thershold;
         }
 
@@ -256,65 +241,37 @@ namespace libint2 {
         ~DFBasisSetGenerator() = default;
 
         /// @brief returns the candidate shells (full set of product functions)
-        std::vector<std::vector<Shell>> candidate_shells() {
+        std::vector<Shell> candidate_shells() {
             return candidate_shells_;
         }
 
-        /// @brief returns the candidate basis set (full set of product functions)
-        /// @warning generates huge and heavily linearly dependent basis sets
-        const BasisSet product_basis() {
-            std::vector <Shell> product_shells;
-            for (auto &&shells: candidate_shells_) {
-                product_shells.insert(product_shells.end(), shells.begin(), shells.end());
-            }
-            return BasisSet(std::move(product_shells));
-        }
-
-        /// @brief returns the candidate shells sorted by angular momentum
-        std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
-            std::vector<std::vector<std::vector<Shell>>> sorted_shells;
-            for (auto &&shells: candidate_shells_) {
-                sorted_shells.push_back(detail::split_by_L(shells));
-            }
-            return sorted_shells;
-        }
-
         /// @brief returns the reduced shells (reduced set of product functions) computed via pivoted Cholesky decomposition
-        std::vector<std::vector<Shell>> reduced_shells() {
-            if (reduced_shells_.size() != 0)
+        std::vector<Shell> reduced_shells() {
+            if (reduced_shells_computed_)
                 return reduced_shells_;
             else {
-                auto candidate_splitted_in_L = candidates_splitted_in_L();
+                auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
                 for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
-                    std::vector<Shell> atom_shells;
-                    for (size_t j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
-                        auto reduced_shells = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
-                                                                             cholesky_threshold_);
-                        atom_shells.insert(atom_shells.end(), reduced_shells.begin(), reduced_shells.end());
-                    }
-                    reduced_shells_.push_back(atom_shells);
+                    auto reduced_shells_L = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i],
+                                                                           cholesky_threshold_);
+                    reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(), reduced_shells_L.end());
                 }
+                reduced_shells_computed_ = true;
             }
             return reduced_shells_;
         }
 
         /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
         const BasisSet reduced_basis() {
-            auto reduced_cluster = reduced_shells();
-            std::vector <Shell> reduced_shells;
-            for (auto &&shells: reduced_cluster) {
-                reduced_shells.insert(reduced_shells.end(), shells.begin(), shells.end());
-            }
-            return BasisSet(std::move(reduced_shells));
+            return BasisSet(reduced_shells_);
         }
 
 
     private:
-        std::string obs_name_;  //name of AO basis set
-        std::vector<Atom> atoms_; //vector of atoms
         double cholesky_threshold_;
-        std::vector<std::vector<Shell>> candidate_shells_;  //full set of product functions
-        std::vector<std::vector<Shell>> reduced_shells_;    //reduced set of product functions
+        std::vector<Shell> candidate_shells_;  //full set of product functions
+        std::vector<Shell> reduced_shells_;    //reduced set of product functions
+        bool reduced_shells_computed_ = false;
 
     };
 
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index 4df25ee1e..287410234 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -290,8 +290,13 @@ int main(int argc, char* argv[]) {
       if (do_density_fitting) {
           if (strcmp(dfbasisname, "autodf") == 0) {
               if (!libint2::initialized()) libint2::initialize();
-              libint2::DFBasisSetGenerator dfbs_generator(basisname, atoms, cholesky_threshold);
-              dfbs = dfbs_generator.reduced_basis();
+              std::vector<Shell> dfbs_shells;
+              for(auto &&atom:atoms) {
+                  libint2::DFBasisSetGenerator dfbs_generator(basisname, atom, cholesky_threshold);
+                  auto reduced_shells = dfbs_generator.reduced_shells();
+                  dfbs_shells.insert(dfbs_shells.end(), reduced_shells.begin(), reduced_shells.end());
+              }
+              dfbs = BasisSet(std::move(dfbs_shells));
           } else
               dfbs = BasisSet(dfbasisname, atoms);
           cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;

From aeaf5358cde3b6c1f5827bfe3d9769d41cb23211 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Tue, 21 Nov 2023 16:37:43 -0500
Subject: [PATCH 15/44] `DFBasisGenerator` generates density fitting basis set
 for one atom not vector of atoms

---
 include/libint2/dfbs_generator.h     | 101 ++++++++-------------------
 tests/hartree-fock/hartree-fock++.cc |   9 ++-
 2 files changed, 36 insertions(+), 74 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 1104f5d3e..66ca502d7 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -39,19 +39,19 @@ namespace libint2 {
         /// @param[in] cluster a vector of shells
         /// @return a vector of uncontracted shells
         inline std::vector <Shell> uncontract(
-                const std::vector <Shell> &cluster) {
-            std::vector <Shell> primitive_cluster;
-            for (const auto &contracted_shell: cluster) {
+                const std::vector <Shell> &shells) {
+            std::vector <Shell> primitive_shells;
+            for (const auto &contracted_shell: shells) {
                 for (auto p = 0; p < contracted_shell.nprim(); p++) {
                     const auto prim_shell = contracted_shell.extract_primitive(p, true);
                     // if dealing with generally contracted basis (e.g., cc-pvxz) represented
                     // as a segmented basis need to remove duplicates
-                    if (std::find(primitive_cluster.begin(), primitive_cluster.end(),
-                                  prim_shell) == primitive_cluster.end())
-                        primitive_cluster.emplace_back(std::move(prim_shell));
+                    if (std::find(primitive_shells.begin(), primitive_shells.end(),
+                                  prim_shell) == primitive_shells.end())
+                        primitive_shells.emplace_back(std::move(prim_shell));
                 }
             }
-            return primitive_cluster;
+            return primitive_shells;
         }
 
 
@@ -106,12 +106,8 @@ namespace libint2 {
         /// @brief creates a set of candidate product shells from a set of primitive shells
         /// @param primitive_shells set of primitive shells
         /// @return set of candidate product shells
-        inline std::vector<std::vector<Shell>> candidate_functions(const std::vector<std::vector<Shell>> &primitive_shells) {
-            std::vector<std::vector<Shell>> candidate_functions;
-            for (auto i = 0; i < primitive_shells.size(); ++i) {
-                candidate_functions.push_back(product_functions(primitive_shells[i]));
-            }
-            return candidate_functions;
+        inline std::vector<Shell> candidate_functions(const std::vector<Shell> &primitive_shells) {
+            return product_functions(primitive_shells);
         }
 
         /// @brief returns a hash map of shell indices to basis function indices
@@ -220,34 +216,23 @@ namespace libint2 {
         /// @param atoms vector of atoms
         /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
         DFBasisSetGenerator(std::string obs_name,
-                            const std::vector<Atom> &atoms, const double cholesky_thershold = 1e-7) : obs_name_(
-                std::move(obs_name)), atoms_(std::move(atoms)) {
-            std::vector<std::vector<Shell>> obs_shell_vec;
-            std::vector<std::vector<Shell>> primitive_cluster;
+                            const Atom &atom, const double cholesky_thershold = 1e-7) {
             // get AO basis shells for each atom
-            for (auto atom: atoms) {
-                auto atom_bs = BasisSet(obs_name_, {atom});
-                obs_shell_vec.emplace_back(atom_bs.shells());
-            }
+            auto atom_bs = BasisSet(obs_name, {atom});
+            auto obs_shells = atom_bs.shells();
             // get primitive shells from AO functions
-            for (auto obs_shells: obs_shell_vec) {
-                primitive_cluster.emplace_back(detail::uncontract(obs_shells));
-            }
-
+            auto primitive_shells = detail::uncontract(obs_shells);
             //compute candidate shells
-            candidate_shells_ = detail::candidate_functions(primitive_cluster);
+            candidate_shells_ = detail::candidate_functions(primitive_shells);
             cholesky_threshold_ = cholesky_thershold;
         }
 
         /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO shells provided by user
         /// @param cluster vector of vector of shells for each atom
         /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::vector<std::vector<Shell>> cluster, const double cholesky_thershold = 1e-7) {
-            std::vector<std::vector<Shell>> primitive_cluster;
-            for (auto i = 0; i < cluster.size(); ++i) {
-                primitive_cluster.emplace_back(detail::uncontract(cluster[i]));
-            }
-            candidate_shells_ = detail::candidate_functions(primitive_cluster);
+        DFBasisSetGenerator(std::vector<Shell> shells, const double cholesky_thershold = 1e-7) {
+            auto primitive_shells = detail::uncontract(shells);
+            candidate_shells_ = detail::candidate_functions(primitive_shells);
             cholesky_threshold_ = cholesky_thershold;
         }
 
@@ -256,65 +241,37 @@ namespace libint2 {
         ~DFBasisSetGenerator() = default;
 
         /// @brief returns the candidate shells (full set of product functions)
-        std::vector<std::vector<Shell>> candidate_shells() {
+        std::vector<Shell> candidate_shells() {
             return candidate_shells_;
         }
 
-        /// @brief returns the candidate basis set (full set of product functions)
-        /// @warning generates huge and heavily linearly dependent basis sets
-        const BasisSet product_basis() {
-            std::vector <Shell> product_shells;
-            for (auto &&shells: candidate_shells_) {
-                product_shells.insert(product_shells.end(), shells.begin(), shells.end());
-            }
-            return BasisSet(std::move(product_shells));
-        }
-
-        /// @brief returns the candidate shells sorted by angular momentum
-        std::vector<std::vector<std::vector<Shell>>> candidates_splitted_in_L() {
-            std::vector<std::vector<std::vector<Shell>>> sorted_shells;
-            for (auto &&shells: candidate_shells_) {
-                sorted_shells.push_back(detail::split_by_L(shells));
-            }
-            return sorted_shells;
-        }
-
         /// @brief returns the reduced shells (reduced set of product functions) computed via pivoted Cholesky decomposition
-        std::vector<std::vector<Shell>> reduced_shells() {
-            if (reduced_shells_.size() != 0)
+        std::vector<Shell> reduced_shells() {
+            if (reduced_shells_computed_)
                 return reduced_shells_;
             else {
-                auto candidate_splitted_in_L = candidates_splitted_in_L();
+                auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
                 for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
-                    std::vector<Shell> atom_shells;
-                    for (size_t j = 0; j < candidate_splitted_in_L[i].size(); ++j) {
-                        auto reduced_shells = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i][j],
-                                                                             cholesky_threshold_);
-                        atom_shells.insert(atom_shells.end(), reduced_shells.begin(), reduced_shells.end());
-                    }
-                    reduced_shells_.push_back(atom_shells);
+                    auto reduced_shells_L = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i],
+                                                                           cholesky_threshold_);
+                    reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(), reduced_shells_L.end());
                 }
+                reduced_shells_computed_ = true;
             }
             return reduced_shells_;
         }
 
         /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
         const BasisSet reduced_basis() {
-            auto reduced_cluster = reduced_shells();
-            std::vector <Shell> reduced_shells;
-            for (auto &&shells: reduced_cluster) {
-                reduced_shells.insert(reduced_shells.end(), shells.begin(), shells.end());
-            }
-            return BasisSet(std::move(reduced_shells));
+            return BasisSet(reduced_shells_);
         }
 
 
     private:
-        std::string obs_name_;  //name of AO basis set
-        std::vector<Atom> atoms_; //vector of atoms
         double cholesky_threshold_;
-        std::vector<std::vector<Shell>> candidate_shells_;  //full set of product functions
-        std::vector<std::vector<Shell>> reduced_shells_;    //reduced set of product functions
+        std::vector<Shell> candidate_shells_;  //full set of product functions
+        std::vector<Shell> reduced_shells_;    //reduced set of product functions
+        bool reduced_shells_computed_ = false;
 
     };
 
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index 4df25ee1e..287410234 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -290,8 +290,13 @@ int main(int argc, char* argv[]) {
       if (do_density_fitting) {
           if (strcmp(dfbasisname, "autodf") == 0) {
               if (!libint2::initialized()) libint2::initialize();
-              libint2::DFBasisSetGenerator dfbs_generator(basisname, atoms, cholesky_threshold);
-              dfbs = dfbs_generator.reduced_basis();
+              std::vector<Shell> dfbs_shells;
+              for(auto &&atom:atoms) {
+                  libint2::DFBasisSetGenerator dfbs_generator(basisname, atom, cholesky_threshold);
+                  auto reduced_shells = dfbs_generator.reduced_shells();
+                  dfbs_shells.insert(dfbs_shells.end(), reduced_shells.begin(), reduced_shells.end());
+              }
+              dfbs = BasisSet(std::move(dfbs_shells));
           } else
               dfbs = BasisSet(dfbasisname, atoms);
           cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;

From 70256262d518d0da75473f56ebfb38519e7c949a Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Wed, 22 Nov 2023 00:47:52 -0500
Subject: [PATCH 16/44] Bugfix: `DFBasisGenerator.reduced_basis()` will not
 return NULL basis

---
 include/libint2/dfbs_generator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 66ca502d7..23ec18c54 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -157,7 +157,7 @@ namespace libint2 {
             int lmax = max_l(shells);
             std::vector<std::vector<Shell>> sorted_shells;
             sorted_shells.resize(lmax + 1);
-            for (auto shell: shells) {
+            for (auto &&shell: shells) {
                 auto l = shell.contr[0].l;
                 sorted_shells[l].push_back(shell);
             }
@@ -263,7 +263,7 @@ namespace libint2 {
 
         /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
         const BasisSet reduced_basis() {
-            return BasisSet(reduced_shells_);
+            return BasisSet(reduced_shells());
         }
 
 

From bdce06c7ae01e5c236023370ef12bc2064bdd64c Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Wed, 22 Nov 2023 11:44:17 -0500
Subject: [PATCH 17/44] switched from deducing size type by `auto` to explicit
 `size_t`

---
 include/libint2/dfbs_generator.h   | 14 +++++++-------
 include/libint2/pivoted_cholesky.h | 18 +++++++++---------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 23ec18c54..e5babccb2 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -80,8 +80,8 @@ namespace libint2 {
         /// @param primitive_shells set of primitive shells
         inline std::vector<Shell> product_functions(const std::vector<Shell> &primitive_shells) {
             std::vector<Shell> product_functions;
-            for (auto i = 0; i < primitive_shells.size(); ++i) {
-                for (auto j = 0; j <= i; ++j) {
+            for (size_t i = 0; i < primitive_shells.size(); ++i) {
+                for (size_t j = 0; j <= i; ++j) {
                     auto li = primitive_shells[i].contr[0].l;
                     auto lj = primitive_shells[j].contr[0].l;
                     for (auto L = std::abs(li - lj); L <= li + lj; L++) {
@@ -136,10 +136,10 @@ namespace libint2 {
             engine.set(ScreeningMethod::Conservative);
             auto shell2bf = map_shell_to_basis_function(shells);
             const auto &buf = engine.results();
-            for (auto s1 = 0; s1 != shells.size(); ++s1) {
+            for (size_t s1 = 0; s1 != shells.size(); ++s1) {
                 auto bf1 = shell2bf[s1];
                 auto n1 = shells[s1].size();
-                for (auto s2 = 0; s2 <= s1; ++s2) {
+                for (size_t s2 = 0; s2 <= s1; ++s2) {
                     auto bf2 = shell2bf[s2];
                     auto n2 = shells[s2].size();
                     engine.compute(shells[s1], shells[s2]);
@@ -174,8 +174,8 @@ namespace libint2 {
             std::vector<size_t> shell_indices; // hash map of basis function indices to shell indices
             auto L = shells[0].contr[0].l; // all shells must have same L
             auto nbf = libint2::nbf(shells);  // total number of basis functions in vector of shells
-            for (auto i = 0; i < n; ++i) {
-                for (auto j = 0; j < 2 * L + 1; ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b integrals
+            for (size_t i = 0; i < n; ++i) {
+                for (size_t j = 0; j < 2 * L + 1; ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b integrals
                     shell_indices.push_back(i);
             }
             assert(shell_indices.size() == nbf);
@@ -194,7 +194,7 @@ namespace libint2 {
             auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
 
             std::vector<Shell> reduced_shells;
-            for (auto i = 0; i < reduced_pivots.size(); ++i) {
+            for (size_t i = 0; i < reduced_pivots.size(); ++i) {
                 // check if the reduced shell is already in reduced shells
                 if (std::find(reduced_shells.begin(), reduced_shells.end(),
                               shells[shell_indices[reduced_pivots[i]]]) == reduced_shells.end())
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 6e19d4921..1046bc7b9 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -25,7 +25,7 @@ namespace libint2 {
         std::vector<double> d(n);
         // initial error
         auto error = A.diagonal()[0];
-        for (auto i = 0; i < n; ++i) {
+        for (size_t i = 0; i < n; ++i) {
             d[i] = A.diagonal()[i];
             error = std::max(d[i], error);
         }
@@ -39,7 +39,7 @@ namespace libint2 {
         // copy input pivot indices
         std::vector<size_t> piv;
         piv.reserve(n);
-        for (auto i = 0; i < n; ++i) {
+        for (size_t i = 0; i < n; ++i) {
             piv.push_back(pivot[i]);
         }
 
@@ -48,24 +48,24 @@ namespace libint2 {
             // update pivot indices
             // Errors in pivoted order
             std::vector<double> err(d.size());
-            for (auto i = 0; i < d.size(); ++i) {
+            for (size_t i = 0; i < d.size(); ++i) {
                 err[i] = d[piv[i]];
             }
             // error vector after mth element
             std::vector<double> err2(err.begin() + m, err.end());
             std::vector<size_t> idx(err2.size());
-            for (auto i = 0; i < idx.size(); ++i) {
+            for (size_t i = 0; i < idx.size(); ++i) {
                 idx[i] = i;
             }
             // sort indices
             std::sort(idx.begin(), idx.end(), [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
             // subvector of piv
             std::vector<size_t> piv_subvec(piv.size() - m);
-            for (auto i = 0; i < piv_subvec.size(); ++i) {
+            for (size_t i = 0; i < piv_subvec.size(); ++i) {
                 piv_subvec[i] = piv[i + m];
             }
             // sort piv
-            for (auto i = 0; i < idx.size(); ++i) {
+            for (size_t i = 0; i < idx.size(); ++i) {
                 piv[i + m] = piv_subvec[idx[i]];
             }
 
@@ -77,7 +77,7 @@ namespace libint2 {
             L(m, pim) = std::sqrt(d[pim]);
 
             //off-diagonal elements
-            for (auto i = m + 1; i < n; ++i) {
+            for (size_t i = m + 1; i < n; ++i) {
                 auto pii = piv[i];
                 //compute element
                 L(m, pii) = (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) / L(m, pim) :
@@ -88,7 +88,7 @@ namespace libint2 {
             //update error
             if (m + 1 < n) {
                 error = d[piv[m + 1]];
-                for (auto i = m + 1; i < n; ++i) {
+                for (size_t i = m + 1; i < n; ++i) {
                     error = std::max(d[piv[i]], error);
                 }
             }
@@ -103,7 +103,7 @@ namespace libint2 {
         // return reduced pivot indices
         std::vector<size_t> reduced_piv;
         reduced_piv.reserve(m);
-        for (auto i = 0; i < m; ++i) {
+        for (size_t i = 0; i < m; ++i) {
             reduced_piv.push_back(piv[i]);
         }
 

From cb046478095ee97a7e1bf0d2eb516f3b6f74345f Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 23 Nov 2023 18:02:23 -0500
Subject: [PATCH 18/44] reformatted code in `bfbs_generator.h` and
 `pivoted_cholesky.h`

---
 include/libint2/dfbs_generator.h   | 208 ++++++++++++++++-------------
 include/libint2/pivoted_cholesky.h |  73 ++++++----
 2 files changed, 160 insertions(+), 121 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index e5babccb2..b608493d0 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -21,28 +21,27 @@
 #ifndef _libint2_src_lib_libint_dfbs_generator_h_
 #define _libint2_src_lib_libint_dfbs_generator_h_
 
-#include <algorithm>
 #include <libint2.h>
-#include <math.h>
-#include <libint2/basis.h>
 #include <libint2/atom.h>
+#include <libint2/basis.h>
 #include <libint2/boys.h>
 #include <libint2/pivoted_cholesky.h>
+#include <math.h>
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
+#include <algorithm>
 
 namespace libint2 {
 
     namespace detail {
 
-        /// Computes uncontracted shells from a vector of shells
-        /// @param[in] cluster a vector of shells
-        /// @return a vector of uncontracted shells
-        inline std::vector <Shell> uncontract(
-                const std::vector <Shell> &shells) {
-            std::vector <Shell> primitive_shells;
-            for (const auto &contracted_shell: shells) {
-                for (auto p = 0; p < contracted_shell.nprim(); p++) {
+/// Computes uncontracted shells from a vector of shells
+/// @param[in] cluster a vector of shells
+/// @return a vector of uncontracted shells
+        inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
+            std::vector<Shell> primitive_shells;
+            for (const auto &contracted_shell : shells) {
+                for (size_t p = 0; p < contracted_shell.nprim(); p++) {
                     const auto prim_shell = contracted_shell.extract_primitive(p, true);
                     // if dealing with generally contracted basis (e.g., cc-pvxz) represented
                     // as a segmented basis need to remove duplicates
@@ -54,42 +53,43 @@ namespace libint2 {
             return primitive_shells;
         }
 
+/// @brief returns \Gamma(x)  of x
+        inline double gamma_function(const double x) { return std::tgamma(x); }
 
-        /// @brief returns \Gamma(x)  of x
-        inline double gamma_function(const double &x) {
-            return std::tgamma(x);
-        }
-
-        /// @brief return effective exponent of product of two primitive shells
-        /// @param shell1 first shell
-        /// @param shell2 second shell
-        /// @param L total angular momentum of product function
-        /// @return effective exponent of product function
-        inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int &L) {
+/// @brief return effective exponent of product of two primitive shells
+/// @param shell1 first shell
+/// @param shell2 second shell
+/// @param L total angular momentum of product function
+/// @return effective exponent of product function
+        inline double alpha_eff(const Shell &shell1, const Shell &shell2,
+                                const int L) {
             auto alpha1 = shell1.alpha[0];
             auto alpha2 = shell2.alpha[0];
             auto l1 = shell1.contr[0].l;
             auto l2 = shell2.contr[0].l;
-            auto prefactor = std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
-                                      (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
-                                      2.);
+            auto prefactor =
+                    std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
+                             (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
+                             2.);
             return prefactor * (alpha1 + alpha2);
         }
 
-        /// @brief creates a set of product functions from a set of primitive shells
-        /// @param primitive_shells set of primitive shells
-        inline std::vector<Shell> product_functions(const std::vector<Shell> &primitive_shells) {
+/// @brief creates a set of product functions from a set of primitive shells
+/// @param primitive_shells set of primitive shells
+        inline std::vector<Shell> product_functions(
+                const std::vector<Shell> &primitive_shells) {
             std::vector<Shell> product_functions;
             for (size_t i = 0; i < primitive_shells.size(); ++i) {
                 for (size_t j = 0; j <= i; ++j) {
                     auto li = primitive_shells[i].contr[0].l;
                     auto lj = primitive_shells[j].contr[0].l;
                     for (auto L = std::abs(li - lj); L <= li + lj; L++) {
-                        auto alpha = libint2::svector<double>({alpha_eff(primitive_shells[i], primitive_shells[j], L)});
+                        auto alpha = libint2::svector<double>(
+                                {alpha_eff(primitive_shells[i], primitive_shells[j], L)});
                         libint2::svector<Shell::Contraction> contr_;
                         Shell::Contraction contr1;
                         contr1.l = L;
-                        contr1.pure = true; // libint2 needs solid harmonics for 2c2b integrals
+                        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
                         contr1.coeff = {1.0};
                         contr_.push_back(contr1);
                         assert(primitive_shells[i].O == primitive_shells[j].O);
@@ -103,20 +103,23 @@ namespace libint2 {
             return product_functions;
         }
 
-        /// @brief creates a set of candidate product shells from a set of primitive shells
-        /// @param primitive_shells set of primitive shells
-        /// @return set of candidate product shells
-        inline std::vector<Shell> candidate_functions(const std::vector<Shell> &primitive_shells) {
+/// @brief creates a set of candidate product shells from a set of primitive
+/// shells
+/// @param primitive_shells set of primitive shells
+/// @return set of candidate product shells
+        inline std::vector<Shell> candidate_functions(
+                const std::vector<Shell> &primitive_shells) {
             return product_functions(primitive_shells);
         }
 
-        /// @brief returns a hash map of shell indices to basis function indices
-        inline std::vector<size_t> map_shell_to_basis_function(const std::vector<libint2::Shell> &shells) {
+/// @brief returns a hash map of shell indices to basis function indices
+        inline std::vector<size_t> map_shell_to_basis_function(
+                const std::vector<libint2::Shell> &shells) {
             std::vector<size_t> result;
             result.reserve(shells.size());
 
             size_t n = 0;
-            for (auto &&shell: shells) {
+            for (auto &&shell : shells) {
                 result.push_back(n);
                 n += shell.size();
             }
@@ -124,10 +127,11 @@ namespace libint2 {
             return result;
         }
 
-        /// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
-        /// @param shells set of shells
-        /// @return Coulomb matrix
-        inline Eigen::MatrixXd compute_coulomb_matrix(const std::vector<Shell> &shells) {
+/// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
+/// @param shells set of shells
+/// @return Coulomb matrix
+        inline Eigen::MatrixXd compute_coulomb_matrix(
+                const std::vector<Shell> &shells) {
             const auto n = nbf(shells);
             Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
             using libint2::Engine;
@@ -145,51 +149,60 @@ namespace libint2 {
                     engine.compute(shells[s1], shells[s2]);
                     Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
                     result.block(bf1, bf2, n1, n2) = buf_mat;
-                    if (s1 != s2)
-                        result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
+                    if (s1 != s2) result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
                 }
             }
             return result;
         }
 
-        /// @brief Sorts a vector of shells by angular momentum
-        inline std::vector<std::vector<Shell>> split_by_L(const std::vector<Shell> &shells) {
+/// @brief Sorts a vector of shells by angular momentum
+        inline std::vector<std::vector<Shell>> split_by_L(
+                const std::vector<Shell> &shells) {
             int lmax = max_l(shells);
             std::vector<std::vector<Shell>> sorted_shells;
             sorted_shells.resize(lmax + 1);
-            for (auto &&shell: shells) {
+            for (auto &&shell : shells) {
                 auto l = shell.contr[0].l;
                 sorted_shells[l].push_back(shell);
             }
             return sorted_shells;
         }
 
-        /// @brief computes the reduced set of product functions via pivoted Cholesky decomposition
-        /// @param shells set of shells
-        /// @param cholesky_threshold threshold for choosing a product function via pivoted Cholesky decomposition
-        /// @return reduced set of product functions
-        inline std::vector<Shell> shell_pivoted_cholesky(const std::vector<Shell> shells, const double cholesky_threshold) {
-
-            auto n = shells.size(); // number of shells
-            std::vector<size_t> shell_indices; // hash map of basis function indices to shell indices
-            auto L = shells[0].contr[0].l; // all shells must have same L
-            auto nbf = libint2::nbf(shells);  // total number of basis functions in vector of shells
+/// @brief computes the reduced set of product functions via pivoted Cholesky
+/// decomposition
+/// @param shells set of shells
+/// @param cholesky_threshold threshold for choosing a product function via
+/// pivoted Cholesky decomposition
+/// @return reduced set of product functions
+        inline std::vector<Shell> shell_pivoted_cholesky(
+                const std::vector<Shell> shells, const double cholesky_threshold) {
+            auto n = shells.size();  // number of shells
+            std::vector<size_t>
+                    shell_indices;  // hash map of basis function indices to shell indices
+            auto L = shells[0].contr[0].l;  // all shells must have same L
+            auto nbf = libint2::nbf(
+                    shells);  // total number of basis functions in vector of shells
             for (size_t i = 0; i < n; ++i) {
-                for (size_t j = 0; j < 2 * L + 1; ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b integrals
+                for (size_t j = 0; j < 2 * L + 1;
+                     ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
+                    // integrals
                     shell_indices.push_back(i);
             }
             assert(shell_indices.size() == nbf);
             auto C = compute_coulomb_matrix(shells);
             std::vector<size_t> pivot(nbf);
-            for(auto i=0;i<nbf;++i){
+            for (auto i = 0; i < nbf; ++i) {
                 pivot[i] = i;
             }
-            // set pivot indices in ascending order of off diagonal elements of Coulomb matrix
-            // see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly repulsive interatomic potentials)
+            // set pivot indices in ascending order of off diagonal elements of Coulomb
+            // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
+            // repulsive interatomic potentials)
             Eigen::MatrixXd C_off_diag = C;
             auto col_sum = C_off_diag.colwise().sum();
             // sort pivot indices in ascending order of column sums
-            std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) { return col_sum[i1] < col_sum[i2]; });
+            std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
+                return col_sum[i1] < col_sum[i2];
+            });
             // compute Cholesky decomposition
             auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
 
@@ -197,43 +210,53 @@ namespace libint2 {
             for (size_t i = 0; i < reduced_pivots.size(); ++i) {
                 // check if the reduced shell is already in reduced shells
                 if (std::find(reduced_shells.begin(), reduced_shells.end(),
-                              shells[shell_indices[reduced_pivots[i]]]) == reduced_shells.end())
-                reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
+                              shells[shell_indices[reduced_pivots[i]]]) ==
+                    reduced_shells.end())
+                    reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
             }
             return reduced_shells;
         }
 
-    }// namespace detail
+    }  // namespace detail
 
-    /// @brief class produces density fitting basis sets from products of AO basis functions
-    /// eliminates linearly dependent functions via pivoted Cholesky decomposition
-    /// see: J. Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for Molecular Calculations with Atomic Orbital Basis Sets)
+/// @brief class produces density fitting basis sets from products of AO basis
+/// functions eliminates linearly dependent functions via pivoted Cholesky
+/// decomposition see: J. Chem. Theory Comput. 2021, 17, 6886−6900
+/// (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for
+/// Molecular Calculations with Atomic Orbital Basis Sets)
     class DFBasisSetGenerator {
     public:
-        /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO basis functions
-        /// see: J. Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for Molecular Calculations with Atomic Orbital Basis Sets)
+        /// @brief constructor for DFBS generator class, generates density fitting
+        /// basis set from products of AO basis functions see: J. Chem. Theory Comput.
+        /// 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary
+        /// Basis Set Generation for Molecular Calculations with Atomic Orbital Basis
+        /// Sets)
         /// @param obs_name name of basis set for AO functions
         /// @param atoms vector of atoms
-        /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::string obs_name,
-                            const Atom &atom, const double cholesky_thershold = 1e-7) {
+        /// @param cholesky_threshold threshold for choosing a product functions via
+        /// pivoted Cholesky decomposition
+        DFBasisSetGenerator(std::string obs_name, const Atom &atom,
+                            const double cholesky_threshold = 1e-7) {
             // get AO basis shells for each atom
             auto atom_bs = BasisSet(obs_name, {atom});
             auto obs_shells = atom_bs.shells();
             // get primitive shells from AO functions
             auto primitive_shells = detail::uncontract(obs_shells);
-            //compute candidate shells
+            // compute candidate shells
             candidate_shells_ = detail::candidate_functions(primitive_shells);
-            cholesky_threshold_ = cholesky_thershold;
+            cholesky_threshold_ = cholesky_threshold;
         }
 
-        /// @brief constructor for DFBS generator class, generates density fitting basis set from products of AO shells provided by user
+        /// @brief constructor for DFBS generator class, generates density fitting
+        /// basis set from products of AO shells provided by user
         /// @param cluster vector of vector of shells for each atom
-        /// @param cholesky_threshold threshold for choosing a product functions via pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::vector<Shell> shells, const double cholesky_thershold = 1e-7) {
+        /// @param cholesky_threshold threshold for choosing a product functions via
+        /// pivoted Cholesky decomposition
+        DFBasisSetGenerator(std::vector<Shell> shells,
+                            const double cholesky_threshold = 1e-7) {
             auto primitive_shells = detail::uncontract(shells);
             candidate_shells_ = detail::candidate_functions(primitive_shells);
-            cholesky_threshold_ = cholesky_thershold;
+            cholesky_threshold_ = cholesky_threshold;
         }
 
         DFBasisSetGenerator() = default;
@@ -241,40 +264,37 @@ namespace libint2 {
         ~DFBasisSetGenerator() = default;
 
         /// @brief returns the candidate shells (full set of product functions)
-        std::vector<Shell> candidate_shells() {
-            return candidate_shells_;
-        }
+        std::vector<Shell> candidate_shells() { return candidate_shells_; }
 
-        /// @brief returns the reduced shells (reduced set of product functions) computed via pivoted Cholesky decomposition
+        /// @brief returns the reduced shells (reduced set of product functions)
+        /// computed via pivoted Cholesky decomposition
         std::vector<Shell> reduced_shells() {
             if (reduced_shells_computed_)
                 return reduced_shells_;
             else {
                 auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
                 for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
-                    auto reduced_shells_L = detail::shell_pivoted_cholesky(candidate_splitted_in_L[i],
-                                                                           cholesky_threshold_);
-                    reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(), reduced_shells_L.end());
+                    auto reduced_shells_L = detail::shell_pivoted_cholesky(
+                            candidate_splitted_in_L[i], cholesky_threshold_);
+                    reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
+                                           reduced_shells_L.end());
                 }
                 reduced_shells_computed_ = true;
             }
             return reduced_shells_;
         }
 
-        /// @brief returns the reduced basis set (reduced set of product functions) computed via pivoted Cholesky decomposition
-        const BasisSet reduced_basis() {
-            return BasisSet(reduced_shells());
-        }
-
+        /// @brief returns the reduced basis set (reduced set of product functions)
+        /// computed via pivoted Cholesky decomposition
+        const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
 
     private:
         double cholesky_threshold_;
-        std::vector<Shell> candidate_shells_;  //full set of product functions
-        std::vector<Shell> reduced_shells_;    //reduced set of product functions
+        std::vector<Shell> candidate_shells_;  // full set of product functions
+        std::vector<Shell> reduced_shells_;    // reduced set of product functions
         bool reduced_shells_computed_ = false;
-
     };
 
-} // namespace libint2
+}  // namespace libint2
 
 #endif /* _libint2_src_lib_libint_dfbs_generator_h_ */
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 1046bc7b9..7887d019d 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -1,24 +1,41 @@
-//
-// Created by Kshitij Surjuse on 11/15/23.
-//
+/*
+ *  Copyright (C) 2004-2021 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_pivoted_cholesky_h_
+#define _libint2_src_lib_libint_pivoted_cholesky_h_
 
-#ifndef LIBINT_PIVOTED_CHOLESKY_H
-#define LIBINT_PIVOTED_CHOLESKY_H
-
-
-#include <iostream>
 #include <Eigen/Dense>
 #include <algorithm>
+#include <iostream>
 
 namespace libint2 {
 
-    /// @brief computes the pivoted Cholesky decomposition of a symmetric positive definite matrix
-    /// @param A symmetric positive definite matrix
-    /// @param tolerance tolerance for the error
-    /// @param pivot initial pivot indices
-    /// @return pivoted Cholesky decomposition of A
-    inline std::vector<size_t>
-    pivoted_cholesky(const Eigen::MatrixXd &A, const double &tolerance, const std::vector<size_t> &pivot) {
+/// @brief computes the pivoted Cholesky decomposition of a symmetric positive
+/// definite matrix
+/// @param A symmetric positive definite matrix
+/// @param tolerance tolerance for the error
+/// @param pivot initial pivot indices
+/// @return pivoted Cholesky decomposition of A
+    inline std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A,
+                                                const double tolerance,
+                                                const std::vector<size_t> &pivot) {
         // number of elements in A
         auto n = A.rows();
         // diagonal elements of A
@@ -44,7 +61,6 @@ namespace libint2 {
         }
 
         while (error > tolerance && m < n) {
-
             // update pivot indices
             // Errors in pivoted order
             std::vector<double> err(d.size());
@@ -58,7 +74,8 @@ namespace libint2 {
                 idx[i] = i;
             }
             // sort indices
-            std::sort(idx.begin(), idx.end(), [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
+            std::sort(idx.begin(), idx.end(),
+                      [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
             // subvector of piv
             std::vector<size_t> piv_subvec(piv.size() - m);
             for (size_t i = 0; i < piv_subvec.size(); ++i) {
@@ -76,26 +93,28 @@ namespace libint2 {
             // compute diagonal element
             L(m, pim) = std::sqrt(d[pim]);
 
-            //off-diagonal elements
+            // off-diagonal elements
             for (size_t i = m + 1; i < n; ++i) {
                 auto pii = piv[i];
-                //compute element
-                L(m, pii) = (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) / L(m, pim) :
-                            (A(pim, pii)) / L(m, pim);
-                //update d
+                // compute element
+                L(m, pii) =
+                        (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) /
+                                  L(m, pim)
+                                : (A(pim, pii)) / L(m, pim);
+                // update d
                 d[pii] -= L(m, pii) * L(m, pii);
             }
-            //update error
+            // update error
             if (m + 1 < n) {
                 error = d[piv[m + 1]];
                 for (size_t i = m + 1; i < n; ++i) {
                     error = std::max(d[piv[i]], error);
                 }
             }
-            //increase m
+            // increase m
             m++;
         }
-        //Transpose to get Cholesky vectors as columns
+        // Transpose to get Cholesky vectors as columns
         L.transposeInPlace();
         // Drop unnecessary columns
         L.conservativeResize(n, m);
@@ -110,6 +129,6 @@ namespace libint2 {
         return reduced_piv;
     }
 
-}
+}  // namespace libint2
 
-#endif //LIBINT_PIVOTED_CHOLESKY_H
+#endif  //_libint2_src_lib_libint_pivoted_cholesky_h_

From aca6d344c9b43441e864e1b555206734586e2e62 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 23 Nov 2023 18:03:19 -0500
Subject: [PATCH 19/44] libint initialized before computing integrals in
 `hartree-fock++.cc`

---
 tests/hartree-fock/hartree-fock++.cc | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index 287410234..b06d88f68 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -285,11 +285,13 @@ int main(int argc, char* argv[]) {
       BasisSet obs(basisname, atoms);
       cout << "orbital basis set rank = " << obs.nbf() << endl;
 
+      // initializes the Libint integrals library ... now ready to compute
+      libint2::initialize();
+
 #ifdef HAVE_DENSITY_FITTING
       BasisSet dfbs;
       if (do_density_fitting) {
           if (strcmp(dfbasisname, "autodf") == 0) {
-              if (!libint2::initialized()) libint2::initialize();
               std::vector<Shell> dfbs_shells;
               for(auto &&atom:atoms) {
                   libint2::DFBasisSetGenerator dfbs_generator(basisname, atom, cholesky_threshold);
@@ -307,10 +309,6 @@ int main(int argc, char* argv[]) {
     /*** compute 1-e integrals       ***/
     /*** =========================== ***/
 
-    // initializes the Libint integrals library ... now ready to compute
-      if (!libint2::initialized())
-          libint2::initialize();
-
       // compute OBS non-negligible shell-pair list
       {
           const auto tstart = std::chrono::high_resolution_clock::now();

From 62ceda38f17b9543fd0c01bc62d008c4249f9a0a Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Sat, 25 Nov 2023 18:31:05 -0500
Subject: [PATCH 20/44] reformatted code in `dfbs_generator.h` and
 `pivoted_cholesky.h` with clang-format,

---
 include/libint2/dfbs_generator.h   | 419 +++++++++++++++--------------
 include/libint2/pivoted_cholesky.h | 172 ++++++------
 2 files changed, 296 insertions(+), 295 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index b608493d0..335767079 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -27,146 +27,146 @@
 #include <libint2/boys.h>
 #include <libint2/pivoted_cholesky.h>
 #include <math.h>
+
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
 #include <algorithm>
 
 namespace libint2 {
 
-    namespace detail {
+namespace detail {
 
 /// Computes uncontracted shells from a vector of shells
 /// @param[in] cluster a vector of shells
 /// @return a vector of uncontracted shells
-        inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
-            std::vector<Shell> primitive_shells;
-            for (const auto &contracted_shell : shells) {
-                for (size_t p = 0; p < contracted_shell.nprim(); p++) {
-                    const auto prim_shell = contracted_shell.extract_primitive(p, true);
-                    // if dealing with generally contracted basis (e.g., cc-pvxz) represented
-                    // as a segmented basis need to remove duplicates
-                    if (std::find(primitive_shells.begin(), primitive_shells.end(),
-                                  prim_shell) == primitive_shells.end())
-                        primitive_shells.emplace_back(std::move(prim_shell));
-                }
-            }
-            return primitive_shells;
-        }
+inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
+  std::vector<Shell> primitive_shells;
+  for (const auto &contracted_shell : shells) {
+    for (size_t p = 0; p < contracted_shell.nprim(); p++) {
+      const auto prim_shell = contracted_shell.extract_primitive(p, true);
+      // if dealing with generally contracted basis (e.g., cc-pvxz) represented
+      // as a segmented basis need to remove duplicates
+      if (std::find(primitive_shells.begin(), primitive_shells.end(),
+                    prim_shell) == primitive_shells.end())
+        primitive_shells.emplace_back(std::move(prim_shell));
+    }
+  }
+  return primitive_shells;
+}
 
 /// @brief returns \Gamma(x)  of x
-        inline double gamma_function(const double x) { return std::tgamma(x); }
+inline double gamma_function(const double x) { return std::tgamma(x); }
 
 /// @brief return effective exponent of product of two primitive shells
 /// @param shell1 first shell
 /// @param shell2 second shell
 /// @param L total angular momentum of product function
 /// @return effective exponent of product function
-        inline double alpha_eff(const Shell &shell1, const Shell &shell2,
-                                const int L) {
-            auto alpha1 = shell1.alpha[0];
-            auto alpha2 = shell2.alpha[0];
-            auto l1 = shell1.contr[0].l;
-            auto l2 = shell2.contr[0].l;
-            auto prefactor =
-                    std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
-                             (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
-                             2.);
-            return prefactor * (alpha1 + alpha2);
-        }
+inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
+  auto alpha1 = shell1.alpha[0];
+  auto alpha2 = shell2.alpha[0];
+  auto l1 = shell1.contr[0].l;
+  auto l2 = shell2.contr[0].l;
+  auto prefactor =
+      std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
+                   (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
+               2.);
+  return prefactor * (alpha1 + alpha2);
+}
 
 /// @brief creates a set of product functions from a set of primitive shells
 /// @param primitive_shells set of primitive shells
-        inline std::vector<Shell> product_functions(
-                const std::vector<Shell> &primitive_shells) {
-            std::vector<Shell> product_functions;
-            for (size_t i = 0; i < primitive_shells.size(); ++i) {
-                for (size_t j = 0; j <= i; ++j) {
-                    auto li = primitive_shells[i].contr[0].l;
-                    auto lj = primitive_shells[j].contr[0].l;
-                    for (auto L = std::abs(li - lj); L <= li + lj; L++) {
-                        auto alpha = libint2::svector<double>(
-                                {alpha_eff(primitive_shells[i], primitive_shells[j], L)});
-                        libint2::svector<Shell::Contraction> contr_;
-                        Shell::Contraction contr1;
-                        contr1.l = L;
-                        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
-                        contr1.coeff = {1.0};
-                        contr_.push_back(contr1);
-                        assert(primitive_shells[i].O == primitive_shells[j].O);
-                        auto shell = Shell(alpha, contr_, primitive_shells[i].O);
-                        if (std::find(product_functions.begin(), product_functions.end(),
-                                      shell) == product_functions.end())
-                            product_functions.emplace_back(shell);
-                    }
-                }
-            }
-            return product_functions;
-        }
+inline std::vector<Shell> product_functions(
+    const std::vector<Shell> &primitive_shells) {
+  std::vector<Shell> product_functions;
+  for (size_t i = 0; i < primitive_shells.size(); ++i) {
+    for (size_t j = 0; j <= i; ++j) {
+      auto li = primitive_shells[i].contr[0].l;
+      auto lj = primitive_shells[j].contr[0].l;
+      for (auto L = std::abs(li - lj); L <= li + lj; L++) {
+        auto alpha = libint2::svector<double>(
+            {alpha_eff(primitive_shells[i], primitive_shells[j], L)});
+        libint2::svector<Shell::Contraction> contr_;
+        Shell::Contraction contr1;
+        contr1.l = L;
+        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
+        contr1.coeff = {1.0};
+        contr_.push_back(contr1);
+        assert(primitive_shells[i].O == primitive_shells[j].O);
+        auto shell = Shell(alpha, contr_, primitive_shells[i].O);
+        if (std::find(product_functions.begin(), product_functions.end(),
+                      shell) == product_functions.end())
+          product_functions.emplace_back(shell);
+      }
+    }
+  }
+  return product_functions;
+}
 
 /// @brief creates a set of candidate product shells from a set of primitive
 /// shells
 /// @param primitive_shells set of primitive shells
 /// @return set of candidate product shells
-        inline std::vector<Shell> candidate_functions(
-                const std::vector<Shell> &primitive_shells) {
-            return product_functions(primitive_shells);
-        }
+inline std::vector<Shell> candidate_functions(
+    const std::vector<Shell> &primitive_shells) {
+  return product_functions(primitive_shells);
+}
 
 /// @brief returns a hash map of shell indices to basis function indices
-        inline std::vector<size_t> map_shell_to_basis_function(
-                const std::vector<libint2::Shell> &shells) {
-            std::vector<size_t> result;
-            result.reserve(shells.size());
+inline std::vector<size_t> map_shell_to_basis_function(
+    const std::vector<libint2::Shell> &shells) {
+  std::vector<size_t> result;
+  result.reserve(shells.size());
 
-            size_t n = 0;
-            for (auto &&shell : shells) {
-                result.push_back(n);
-                n += shell.size();
-            }
+  size_t n = 0;
+  for (auto &&shell : shells) {
+    result.push_back(n);
+    n += shell.size();
+  }
 
-            return result;
-        }
+  return result;
+}
 
 /// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
 /// @param shells set of shells
 /// @return Coulomb matrix
-        inline Eigen::MatrixXd compute_coulomb_matrix(
-                const std::vector<Shell> &shells) {
-            const auto n = nbf(shells);
-            Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
-            using libint2::Engine;
-            Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
-            engine.set(BraKet::xs_xs);
-            engine.set(ScreeningMethod::Conservative);
-            auto shell2bf = map_shell_to_basis_function(shells);
-            const auto &buf = engine.results();
-            for (size_t s1 = 0; s1 != shells.size(); ++s1) {
-                auto bf1 = shell2bf[s1];
-                auto n1 = shells[s1].size();
-                for (size_t s2 = 0; s2 <= s1; ++s2) {
-                    auto bf2 = shell2bf[s2];
-                    auto n2 = shells[s2].size();
-                    engine.compute(shells[s1], shells[s2]);
-                    Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
-                    result.block(bf1, bf2, n1, n2) = buf_mat;
-                    if (s1 != s2) result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
-                }
-            }
-            return result;
-        }
+inline Eigen::MatrixXd compute_coulomb_matrix(
+    const std::vector<Shell> &shells) {
+  const auto n = nbf(shells);
+  Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
+  using libint2::Engine;
+  Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
+  engine.set(BraKet::xs_xs);
+  engine.set(ScreeningMethod::Conservative);
+  auto shell2bf = map_shell_to_basis_function(shells);
+  const auto &buf = engine.results();
+  for (size_t s1 = 0; s1 != shells.size(); ++s1) {
+    auto bf1 = shell2bf[s1];
+    auto n1 = shells[s1].size();
+    for (size_t s2 = 0; s2 <= s1; ++s2) {
+      auto bf2 = shell2bf[s2];
+      auto n2 = shells[s2].size();
+      engine.compute(shells[s1], shells[s2]);
+      Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
+      result.block(bf1, bf2, n1, n2) = buf_mat;
+      if (s1 != s2) result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
+    }
+  }
+  return result;
+}
 
 /// @brief Sorts a vector of shells by angular momentum
-        inline std::vector<std::vector<Shell>> split_by_L(
-                const std::vector<Shell> &shells) {
-            int lmax = max_l(shells);
-            std::vector<std::vector<Shell>> sorted_shells;
-            sorted_shells.resize(lmax + 1);
-            for (auto &&shell : shells) {
-                auto l = shell.contr[0].l;
-                sorted_shells[l].push_back(shell);
-            }
-            return sorted_shells;
-        }
+inline std::vector<std::vector<Shell>> split_by_L(
+    const std::vector<Shell> &shells) {
+  int lmax = max_l(shells);
+  std::vector<std::vector<Shell>> sorted_shells;
+  sorted_shells.resize(lmax + 1);
+  for (auto &&shell : shells) {
+    auto l = shell.contr[0].l;
+    sorted_shells[l].push_back(shell);
+  }
+  return sorted_shells;
+}
 
 /// @brief computes the reduced set of product functions via pivoted Cholesky
 /// decomposition
@@ -174,126 +174,127 @@ namespace libint2 {
 /// @param cholesky_threshold threshold for choosing a product function via
 /// pivoted Cholesky decomposition
 /// @return reduced set of product functions
-        inline std::vector<Shell> shell_pivoted_cholesky(
-                const std::vector<Shell> shells, const double cholesky_threshold) {
-            auto n = shells.size();  // number of shells
-            std::vector<size_t>
-                    shell_indices;  // hash map of basis function indices to shell indices
-            auto L = shells[0].contr[0].l;  // all shells must have same L
-            auto nbf = libint2::nbf(
-                    shells);  // total number of basis functions in vector of shells
-            for (size_t i = 0; i < n; ++i) {
-                for (size_t j = 0; j < 2 * L + 1;
-                     ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
-                    // integrals
-                    shell_indices.push_back(i);
-            }
-            assert(shell_indices.size() == nbf);
-            auto C = compute_coulomb_matrix(shells);
-            std::vector<size_t> pivot(nbf);
-            for (auto i = 0; i < nbf; ++i) {
-                pivot[i] = i;
-            }
-            // set pivot indices in ascending order of off diagonal elements of Coulomb
-            // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
-            // repulsive interatomic potentials)
-            Eigen::MatrixXd C_off_diag = C;
-            auto col_sum = C_off_diag.colwise().sum();
-            // sort pivot indices in ascending order of column sums
-            std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
-                return col_sum[i1] < col_sum[i2];
-            });
-            // compute Cholesky decomposition
-            auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
+inline std::vector<Shell> shell_pivoted_cholesky(
+    const std::vector<Shell> shells, const double cholesky_threshold) {
+  auto n = shells.size();  // number of shells
+  std::vector<size_t>
+      shell_indices;  // hash map of basis function indices to shell indices
+  auto L = shells[0].contr[0].l;  // all shells must have same L
+  auto nbf = libint2::nbf(
+      shells);  // total number of basis functions in vector of shells
+  for (size_t i = 0; i < n; ++i) {
+    for (size_t j = 0; j < 2 * L + 1;
+         ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
+      // integrals
+      shell_indices.push_back(i);
+  }
+  assert(shell_indices.size() == nbf);
+  auto C = compute_coulomb_matrix(shells);
+  std::vector<size_t> pivot(nbf);
+  for (auto i = 0; i < nbf; ++i) {
+    pivot[i] = i;
+  }
+  // set pivot indices in ascending order of off diagonal elements of Coulomb
+  // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
+  // repulsive interatomic potentials)
+  Eigen::MatrixXd C_off_diag = C;
+  auto col_sum = C_off_diag.colwise().sum();
+  // sort pivot indices in ascending order of column sums
+  std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
+    return col_sum[i1] < col_sum[i2];
+  });
+  // compute Cholesky decomposition
+  auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
 
-            std::vector<Shell> reduced_shells;
-            for (size_t i = 0; i < reduced_pivots.size(); ++i) {
-                // check if the reduced shell is already in reduced shells
-                if (std::find(reduced_shells.begin(), reduced_shells.end(),
-                              shells[shell_indices[reduced_pivots[i]]]) ==
-                    reduced_shells.end())
-                    reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
-            }
-            return reduced_shells;
-        }
+  std::vector<Shell> reduced_shells;
+  for (size_t i = 0; i < reduced_pivots.size(); ++i) {
+    // check if the reduced shell is already in reduced shells
+    if (std::find(reduced_shells.begin(), reduced_shells.end(),
+                  shells[shell_indices[reduced_pivots[i]]]) ==
+        reduced_shells.end())
+      reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
+  }
+  return reduced_shells;
+}
 
-    }  // namespace detail
+}  // namespace detail
 
 /// @brief class produces density fitting basis sets from products of AO basis
 /// functions eliminates linearly dependent functions via pivoted Cholesky
 /// decomposition see: J. Chem. Theory Comput. 2021, 17, 6886−6900
 /// (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for
 /// Molecular Calculations with Atomic Orbital Basis Sets)
-    class DFBasisSetGenerator {
-    public:
-        /// @brief constructor for DFBS generator class, generates density fitting
-        /// basis set from products of AO basis functions see: J. Chem. Theory Comput.
-        /// 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary
-        /// Basis Set Generation for Molecular Calculations with Atomic Orbital Basis
-        /// Sets)
-        /// @param obs_name name of basis set for AO functions
-        /// @param atoms vector of atoms
-        /// @param cholesky_threshold threshold for choosing a product functions via
-        /// pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::string obs_name, const Atom &atom,
-                            const double cholesky_threshold = 1e-7) {
-            // get AO basis shells for each atom
-            auto atom_bs = BasisSet(obs_name, {atom});
-            auto obs_shells = atom_bs.shells();
-            // get primitive shells from AO functions
-            auto primitive_shells = detail::uncontract(obs_shells);
-            // compute candidate shells
-            candidate_shells_ = detail::candidate_functions(primitive_shells);
-            cholesky_threshold_ = cholesky_threshold;
-        }
+class DFBasisSetGenerator {
+ public:
+  /// @brief constructor for DFBS generator class, generates density fitting
+  /// basis set from products of AO basis functions see: J. Chem. Theory Comput.
+  /// 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary
+  /// Basis Set Generation for Molecular Calculations with Atomic Orbital Basis
+  /// Sets)
+  /// @param obs_name name of basis set for AO functions
+  /// @param atoms vector of atoms
+  /// @param cholesky_threshold threshold for choosing a product functions via
+  /// pivoted Cholesky decomposition
+  DFBasisSetGenerator(std::string obs_name, const Atom &atom,
+                      const double cholesky_threshold = 1e-7) {
+    // get AO basis shells for each atom
+    auto atom_bs = BasisSet(obs_name, {atom});
+    auto obs_shells = atom_bs.shells();
+    // get primitive shells from AO functions
+    auto primitive_shells = detail::uncontract(obs_shells);
+    // compute candidate shells
+    candidate_shells_ = detail::candidate_functions(primitive_shells);
+    cholesky_threshold_ = cholesky_threshold;
+  }
 
-        /// @brief constructor for DFBS generator class, generates density fitting
-        /// basis set from products of AO shells provided by user
-        /// @param cluster vector of vector of shells for each atom
-        /// @param cholesky_threshold threshold for choosing a product functions via
-        /// pivoted Cholesky decomposition
-        DFBasisSetGenerator(std::vector<Shell> shells,
-                            const double cholesky_threshold = 1e-7) {
-            auto primitive_shells = detail::uncontract(shells);
-            candidate_shells_ = detail::candidate_functions(primitive_shells);
-            cholesky_threshold_ = cholesky_threshold;
-        }
+  /// @brief constructor for DFBS generator class, generates density fitting
+  /// basis set from products of AO shells provided by user
+  /// @param cluster vector of vector of shells for each atom
+  /// @param cholesky_threshold threshold for choosing a product functions via
+  /// pivoted Cholesky decomposition
+  DFBasisSetGenerator(std::vector<Shell> shells,
+                      const double cholesky_threshold = 1e-7) {
+    auto primitive_shells = detail::uncontract(shells);
+    candidate_shells_ = detail::candidate_functions(primitive_shells);
+    cholesky_threshold_ = cholesky_threshold;
+  }
 
-        DFBasisSetGenerator() = default;
+  DFBasisSetGenerator() = default;
 
-        ~DFBasisSetGenerator() = default;
+  ~DFBasisSetGenerator() = default;
 
-        /// @brief returns the candidate shells (full set of product functions)
-        std::vector<Shell> candidate_shells() { return candidate_shells_; }
+  /// @brief returns the candidate shells (full set of product functions)
+  std::vector<Shell> candidate_shells() { return candidate_shells_; }
 
-        /// @brief returns the reduced shells (reduced set of product functions)
-        /// computed via pivoted Cholesky decomposition
-        std::vector<Shell> reduced_shells() {
-            if (reduced_shells_computed_)
-                return reduced_shells_;
-            else {
-                auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
-                for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
-                    auto reduced_shells_L = detail::shell_pivoted_cholesky(
-                            candidate_splitted_in_L[i], cholesky_threshold_);
-                    reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
-                                           reduced_shells_L.end());
-                }
-                reduced_shells_computed_ = true;
-            }
-            return reduced_shells_;
-        }
+  /// @brief returns the reduced shells (reduced set of product functions)
+  /// computed via pivoted Cholesky decomposition
+  std::vector<Shell> reduced_shells() {
+    if (reduced_shells_computed_)
+      return reduced_shells_;
+    else {
+      auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
+      for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
+        auto reduced_shells_L = detail::shell_pivoted_cholesky(
+            candidate_splitted_in_L[i], cholesky_threshold_);
+        reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
+                               reduced_shells_L.end());
+      }
+      reduced_shells_computed_ = true;
+    }
+    return reduced_shells_;
+  }
 
-        /// @brief returns the reduced basis set (reduced set of product functions)
-        /// computed via pivoted Cholesky decomposition
-        const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
+  /// @brief returns the reduced basis set (reduced set of product
+  /// functions)
+  /// computed via pivoted Cholesky decomposition
+  const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
 
-    private:
-        double cholesky_threshold_;
-        std::vector<Shell> candidate_shells_;  // full set of product functions
-        std::vector<Shell> reduced_shells_;    // reduced set of product functions
-        bool reduced_shells_computed_ = false;
-    };
+ private:
+  double cholesky_threshold_;
+  std::vector<Shell> candidate_shells_;  // full set of product functions
+  std::vector<Shell> reduced_shells_;    // reduced set of product functions
+  bool reduced_shells_computed_ = false;
+};
 
 }  // namespace libint2
 
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 7887d019d..0ed7d6dbd 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -33,101 +33,101 @@ namespace libint2 {
 /// @param tolerance tolerance for the error
 /// @param pivot initial pivot indices
 /// @return pivoted Cholesky decomposition of A
-    inline std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A,
-                                                const double tolerance,
-                                                const std::vector<size_t> &pivot) {
-        // number of elements in A
-        auto n = A.rows();
-        // diagonal elements of A
-        std::vector<double> d(n);
-        // initial error
-        auto error = A.diagonal()[0];
-        for (size_t i = 0; i < n; ++i) {
-            d[i] = A.diagonal()[i];
-            error = std::max(d[i], error);
-        }
+inline std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A,
+                                            const double tolerance,
+                                            const std::vector<size_t> &pivot) {
+  // number of elements in A
+  auto n = A.rows();
+  // diagonal elements of A
+  std::vector<double> d(n);
+  // initial error
+  auto error = A.diagonal()[0];
+  for (size_t i = 0; i < n; ++i) {
+    d[i] = A.diagonal()[i];
+    error = std::max(d[i], error);
+  }
 
-        // Return matrix
-        Eigen::MatrixXd L(n, n);
+  // Return matrix
+  Eigen::MatrixXd L(n, n);
 
-        // loop index
-        size_t m = 0;
+  // loop index
+  size_t m = 0;
 
-        // copy input pivot indices
-        std::vector<size_t> piv;
-        piv.reserve(n);
-        for (size_t i = 0; i < n; ++i) {
-            piv.push_back(pivot[i]);
-        }
+  // copy input pivot indices
+  std::vector<size_t> piv;
+  piv.reserve(n);
+  for (size_t i = 0; i < n; ++i) {
+    piv.push_back(pivot[i]);
+  }
 
-        while (error > tolerance && m < n) {
-            // update pivot indices
-            // Errors in pivoted order
-            std::vector<double> err(d.size());
-            for (size_t i = 0; i < d.size(); ++i) {
-                err[i] = d[piv[i]];
-            }
-            // error vector after mth element
-            std::vector<double> err2(err.begin() + m, err.end());
-            std::vector<size_t> idx(err2.size());
-            for (size_t i = 0; i < idx.size(); ++i) {
-                idx[i] = i;
-            }
-            // sort indices
-            std::sort(idx.begin(), idx.end(),
-                      [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
-            // subvector of piv
-            std::vector<size_t> piv_subvec(piv.size() - m);
-            for (size_t i = 0; i < piv_subvec.size(); ++i) {
-                piv_subvec[i] = piv[i + m];
-            }
-            // sort piv
-            for (size_t i = 0; i < idx.size(); ++i) {
-                piv[i + m] = piv_subvec[idx[i]];
-            }
+  while (error > tolerance && m < n) {
+    // update pivot indices
+    // Errors in pivoted order
+    std::vector<double> err(d.size());
+    for (size_t i = 0; i < d.size(); ++i) {
+      err[i] = d[piv[i]];
+    }
+    // error vector after mth element
+    std::vector<double> err2(err.begin() + m, err.end());
+    std::vector<size_t> idx(err2.size());
+    for (size_t i = 0; i < idx.size(); ++i) {
+      idx[i] = i;
+    }
+    // sort indices
+    std::sort(idx.begin(), idx.end(),
+              [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; });
+    // subvector of piv
+    std::vector<size_t> piv_subvec(piv.size() - m);
+    for (size_t i = 0; i < piv_subvec.size(); ++i) {
+      piv_subvec[i] = piv[i + m];
+    }
+    // sort piv
+    for (size_t i = 0; i < idx.size(); ++i) {
+      piv[i + m] = piv_subvec[idx[i]];
+    }
 
-            // TODO: find a better way to update pivot indices
+    // TODO: find a better way to update pivot indices
 
-            // current pivot index
-            size_t pim = piv[m];
-            // compute diagonal element
-            L(m, pim) = std::sqrt(d[pim]);
+    // current pivot index
+    size_t pim = piv[m];
+    // compute diagonal element
+    L(m, pim) = std::sqrt(d[pim]);
 
-            // off-diagonal elements
-            for (size_t i = m + 1; i < n; ++i) {
-                auto pii = piv[i];
-                // compute element
-                L(m, pii) =
-                        (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) /
-                                  L(m, pim)
-                                : (A(pim, pii)) / L(m, pim);
-                // update d
-                d[pii] -= L(m, pii) * L(m, pii);
-            }
-            // update error
-            if (m + 1 < n) {
-                error = d[piv[m + 1]];
-                for (size_t i = m + 1; i < n; ++i) {
-                    error = std::max(d[piv[i]], error);
-                }
-            }
-            // increase m
-            m++;
-        }
-        // Transpose to get Cholesky vectors as columns
-        L.transposeInPlace();
-        // Drop unnecessary columns
-        L.conservativeResize(n, m);
+    // off-diagonal elements
+    for (size_t i = m + 1; i < n; ++i) {
+      auto pii = piv[i];
+      // compute element
+      L(m, pii) =
+          (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) /
+                        L(m, pim)
+                  : (A(pim, pii)) / L(m, pim);
+      // update d
+      d[pii] -= L(m, pii) * L(m, pii);
+    }
+    // update error
+    if (m + 1 < n) {
+      error = d[piv[m + 1]];
+      for (size_t i = m + 1; i < n; ++i) {
+        error = std::max(d[piv[i]], error);
+      }
+    }
+    // increase m
+    m++;
+  }
+  // Transpose to get Cholesky vectors as columns
+  L.transposeInPlace();
+  // Drop unnecessary columns
+  L.conservativeResize(n, m);
 
-        // return reduced pivot indices
-        std::vector<size_t> reduced_piv;
-        reduced_piv.reserve(m);
-        for (size_t i = 0; i < m; ++i) {
-            reduced_piv.push_back(piv[i]);
-        }
+  // return reduced pivot indices
+  std::vector<size_t> reduced_piv;
+  reduced_piv.reserve(m);
+  for (size_t i = 0; i < m; ++i) {
+    reduced_piv.push_back(piv[i]);
+  }
 
-        return reduced_piv;
-    }
+  return reduced_piv;
+}
 
 }  // namespace libint2
 

From 6640567d631e24bfbbf07d33dd227f19e9409a4b Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Sat, 25 Nov 2023 18:33:43 -0500
Subject: [PATCH 21/44] bugfix: if only one product shell exist for an `L` then
 is included in reduced set

---
 include/libint2/dfbs_generator.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 335767079..2df4eea48 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -274,8 +274,12 @@ class DFBasisSetGenerator {
     else {
       auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
       for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
-        auto reduced_shells_L = detail::shell_pivoted_cholesky(
-            candidate_splitted_in_L[i], cholesky_threshold_);
+        std::vector<Shell> reduced_shells_L;
+        if (candidate_splitted_in_L[i].size() > 1)
+          reduced_shells_L = detail::shell_pivoted_cholesky(
+              candidate_splitted_in_L[i], cholesky_threshold_);
+        else
+          reduced_shells_L = candidate_splitted_in_L[i];
         reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
                                reduced_shells_L.end());
       }

From 68d4109abb04a3a6badf17b643283121514f2e9e Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 27 Nov 2023 15:51:42 -0500
Subject: [PATCH 22/44] removed unnecessary empty file, removed passing copies
 as arguments

---
 include/libint2/dfbs_generator.h                  | 4 ++--
 include/libint2/util/generated/pivoted_cholesky.h | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 include/libint2/util/generated/pivoted_cholesky.h

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 2df4eea48..b19931686 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -175,7 +175,7 @@ inline std::vector<std::vector<Shell>> split_by_L(
 /// pivoted Cholesky decomposition
 /// @return reduced set of product functions
 inline std::vector<Shell> shell_pivoted_cholesky(
-    const std::vector<Shell> shells, const double cholesky_threshold) {
+    const std::vector<Shell> &shells, const double cholesky_threshold) {
   auto n = shells.size();  // number of shells
   std::vector<size_t>
       shell_indices;  // hash map of basis function indices to shell indices
@@ -252,7 +252,7 @@ class DFBasisSetGenerator {
   /// @param cluster vector of vector of shells for each atom
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
-  DFBasisSetGenerator(std::vector<Shell> shells,
+  DFBasisSetGenerator(std::vector<Shell> &shells,
                       const double cholesky_threshold = 1e-7) {
     auto primitive_shells = detail::uncontract(shells);
     candidate_shells_ = detail::candidate_functions(primitive_shells);
diff --git a/include/libint2/util/generated/pivoted_cholesky.h b/include/libint2/util/generated/pivoted_cholesky.h
deleted file mode 100644
index e69de29bb..000000000

From 0b6a961645e43c1b8c1bb575c4a9be07f140f2ca Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 14 Dec 2023 19:37:33 +0530
Subject: [PATCH 23/44] use `const auto` for `const` variables, and bumped up
 eri2 available `max_am`

---
 configure.ac                         |  3 +-
 include/libint2/dfbs_generator.h     | 43 ++++++++++++++--------------
 include/libint2/pivoted_cholesky.h   |  2 +-
 tests/hartree-fock/hartree-fock++.cc |  1 -
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/configure.ac b/configure.ac
index 95d5ce520..74dd4ec06 100644
--- a/configure.ac
+++ b/configure.ac
@@ -598,7 +598,7 @@ AS_HELP_STRING([--with-eri2-max-am=N],[Support 2-center ERIs for Gaussians of an
      else
       ERI2_MAX_AM=$withval
      fi
-     if test $ERI2_MAX_AM -ge 8; then
+     if test $ERI2_MAX_AM -ge 13; then
       AC_MSG_ERROR([Value for --with-eri2-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI2_MAX_AM)
@@ -1541,4 +1541,3 @@ AC_CONFIG_FILES(src/bin/MakeVars src/lib/MakeVars src/lib/MakeRules src/lib/libi
 
 dnl --------- Done! ---------
 AC_OUTPUT
-
diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index b19931686..da01cde23 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -63,11 +63,11 @@ inline double gamma_function(const double x) { return std::tgamma(x); }
 /// @param L total angular momentum of product function
 /// @return effective exponent of product function
 inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
-  auto alpha1 = shell1.alpha[0];
-  auto alpha2 = shell2.alpha[0];
-  auto l1 = shell1.contr[0].l;
-  auto l2 = shell2.contr[0].l;
-  auto prefactor =
+  const auto alpha1 = shell1.alpha[0];
+  const auto alpha2 = shell2.alpha[0];
+  const auto l1 = shell1.contr[0].l;
+  const auto l2 = shell2.contr[0].l;
+  const auto prefactor =
       std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) /
                    (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)),
                2.);
@@ -81,10 +81,10 @@ inline std::vector<Shell> product_functions(
   std::vector<Shell> product_functions;
   for (size_t i = 0; i < primitive_shells.size(); ++i) {
     for (size_t j = 0; j <= i; ++j) {
-      auto li = primitive_shells[i].contr[0].l;
-      auto lj = primitive_shells[j].contr[0].l;
+      const auto li = primitive_shells[i].contr[0].l;
+      const auto lj = primitive_shells[j].contr[0].l;
       for (auto L = std::abs(li - lj); L <= li + lj; L++) {
-        auto alpha = libint2::svector<double>(
+        const auto alpha = libint2::svector<double>(
             {alpha_eff(primitive_shells[i], primitive_shells[j], L)});
         libint2::svector<Shell::Contraction> contr_;
         Shell::Contraction contr1;
@@ -93,7 +93,7 @@ inline std::vector<Shell> product_functions(
         contr1.coeff = {1.0};
         contr_.push_back(contr1);
         assert(primitive_shells[i].O == primitive_shells[j].O);
-        auto shell = Shell(alpha, contr_, primitive_shells[i].O);
+        const auto shell = Shell(alpha, contr_, primitive_shells[i].O);
         if (std::find(product_functions.begin(), product_functions.end(),
                       shell) == product_functions.end())
           product_functions.emplace_back(shell);
@@ -138,7 +138,7 @@ inline Eigen::MatrixXd compute_coulomb_matrix(
   Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
   engine.set(BraKet::xs_xs);
   engine.set(ScreeningMethod::Conservative);
-  auto shell2bf = map_shell_to_basis_function(shells);
+  const auto shell2bf = map_shell_to_basis_function(shells);
   const auto &buf = engine.results();
   for (size_t s1 = 0; s1 != shells.size(); ++s1) {
     auto bf1 = shell2bf[s1];
@@ -176,11 +176,11 @@ inline std::vector<std::vector<Shell>> split_by_L(
 /// @return reduced set of product functions
 inline std::vector<Shell> shell_pivoted_cholesky(
     const std::vector<Shell> &shells, const double cholesky_threshold) {
-  auto n = shells.size();  // number of shells
+  const auto n = shells.size();  // number of shells
   std::vector<size_t>
       shell_indices;  // hash map of basis function indices to shell indices
-  auto L = shells[0].contr[0].l;  // all shells must have same L
-  auto nbf = libint2::nbf(
+  const auto L = shells[0].contr[0].l;  // all shells must have same L
+  const auto nbf = libint2::nbf(
       shells);  // total number of basis functions in vector of shells
   for (size_t i = 0; i < n; ++i) {
     for (size_t j = 0; j < 2 * L + 1;
@@ -189,7 +189,7 @@ inline std::vector<Shell> shell_pivoted_cholesky(
       shell_indices.push_back(i);
   }
   assert(shell_indices.size() == nbf);
-  auto C = compute_coulomb_matrix(shells);
+  const auto C = compute_coulomb_matrix(shells);
   std::vector<size_t> pivot(nbf);
   for (auto i = 0; i < nbf; ++i) {
     pivot[i] = i;
@@ -204,7 +204,7 @@ inline std::vector<Shell> shell_pivoted_cholesky(
     return col_sum[i1] < col_sum[i2];
   });
   // compute Cholesky decomposition
-  auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
+  const auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
 
   std::vector<Shell> reduced_shells;
   for (size_t i = 0; i < reduced_pivots.size(); ++i) {
@@ -238,10 +238,10 @@ class DFBasisSetGenerator {
   DFBasisSetGenerator(std::string obs_name, const Atom &atom,
                       const double cholesky_threshold = 1e-7) {
     // get AO basis shells for each atom
-    auto atom_bs = BasisSet(obs_name, {atom});
-    auto obs_shells = atom_bs.shells();
+    const auto atom_bs = BasisSet(obs_name, {atom});
+    const auto obs_shells = atom_bs.shells();
     // get primitive shells from AO functions
-    auto primitive_shells = detail::uncontract(obs_shells);
+    const auto primitive_shells = detail::uncontract(obs_shells);
     // compute candidate shells
     candidate_shells_ = detail::candidate_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
@@ -252,9 +252,9 @@ class DFBasisSetGenerator {
   /// @param cluster vector of vector of shells for each atom
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
-  DFBasisSetGenerator(std::vector<Shell> &shells,
+  DFBasisSetGenerator(const std::vector<Shell> &shells,
                       const double cholesky_threshold = 1e-7) {
-    auto primitive_shells = detail::uncontract(shells);
+    const auto primitive_shells = detail::uncontract(shells);
     candidate_shells_ = detail::candidate_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
   }
@@ -272,7 +272,8 @@ class DFBasisSetGenerator {
     if (reduced_shells_computed_)
       return reduced_shells_;
     else {
-      auto candidate_splitted_in_L = detail::split_by_L(candidate_shells_);
+      const auto candidate_splitted_in_L =
+          detail::split_by_L(candidate_shells_);
       for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
         std::vector<Shell> reduced_shells_L;
         if (candidate_splitted_in_L[i].size() > 1)
diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h
index 0ed7d6dbd..db88d60f9 100644
--- a/include/libint2/pivoted_cholesky.h
+++ b/include/libint2/pivoted_cholesky.h
@@ -37,7 +37,7 @@ inline std::vector<size_t> pivoted_cholesky(const Eigen::MatrixXd &A,
                                             const double tolerance,
                                             const std::vector<size_t> &pivot) {
   // number of elements in A
-  auto n = A.rows();
+  const auto n = A.rows();
   // diagonal elements of A
   std::vector<double> d(n);
   // initial error
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index a65a09433..805e249ac 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -1528,7 +1528,6 @@ Matrix compute_2body_2index_ints(const BasisSet& bs) {
   }
 
   auto shell2bf = bs.shell2bf();
-  auto unitshell = Shell::unit();
 
   auto compute = [&](int thread_id) {
     auto& engine = engines[thread_id];

From 527be9e5dad890fbb95a9c6a6535e7f1de7ecac0 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 15 Jan 2024 13:23:30 -0500
Subject: [PATCH 24/44] added unit test for `DFBasisGenerator`

---
 export/cmake/CMakeLists.txt.export |   1 +
 tests/unit/Makefile                |   3 +-
 tests/unit/test-df-autogen.cc      | 184 +++++++++++++++++++++++++++++
 3 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/test-df-autogen.cc

diff --git a/export/cmake/CMakeLists.txt.export b/export/cmake/CMakeLists.txt.export
index 87fc75cbd..c61866ec5 100644
--- a/export/cmake/CMakeLists.txt.export
+++ b/export/cmake/CMakeLists.txt.export
@@ -410,6 +410,7 @@ if (LIBINT_HAS_CXX_API)
             tests/unit/test-c-api.cc
             tests/unit/test-core.cc
             tests/unit/test-core-ints.cc
+            #tests/unit/test-df-autogen.cc
             tests/unit/test-permute.cc
             tests/unit/test-precision.cc
             tests/unit/test-shell-order.cc
diff --git a/tests/unit/Makefile b/tests/unit/Makefile
index bd0257772..6b8fcc213 100644
--- a/tests/unit/Makefile
+++ b/tests/unit/Makefile
@@ -26,7 +26,8 @@ CXXDEPENDFLAGS = -M
 TEST1 = test
 CXXTEST1SRC = test.cc test-core.cc test-permute.cc \
   test-1body.cc test-core-ints.cc test-c-api.cc test-shell-order.cc \
-  test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc
+  test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc\
+  test-df-autogen.cc
 CXXTEST1OBJ = $(CXXTEST1SRC:%.cc=%.$(OBJSUF))
 CXXTEST1DEP = $(CXXTEST1SRC:%.cc=%.$(DEPSUF))
 CTEST1SRC = c-api.c
diff --git a/tests/unit/test-df-autogen.cc b/tests/unit/test-df-autogen.cc
new file mode 100644
index 000000000..d8fbe272f
--- /dev/null
+++ b/tests/unit/test-df-autogen.cc
@@ -0,0 +1,184 @@
+#include <libint2/atom.h>
+#include <libint2/basis.h>
+#include <libint2/dfbs_generator.h>
+#include <libint2/engine.h>
+
+#include <Eigen/Cholesky>
+#include <Eigen/Dense>
+#include <unsupported/Eigen/CXX11/Tensor>
+
+#include "catch.hpp"
+#include "fixture.h"
+
+Eigen::Tensor<double, 3> compute_eri3(const BasisSet &obs,
+                                      const BasisSet &dfbs) {
+  const auto nshells = obs.size();
+  const auto nshells_df = dfbs.size();
+  const auto &unitshell = libint2::Shell::unit();
+
+  // construct the 2-electron 3-center repulsion integrals engine
+  // since the code assumes (xx|xs) braket, and Engine/libint only produces
+  // (xs|xx), use 4-center engine
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), dfbs.max_nprim()),
+                         std::max(obs.max_l(), dfbs.max_l()), 0);
+  engine.set(BraKet::xs_xx);
+  const auto shell2bf = obs.shell2bf();
+  const auto shell2bf_df = dfbs.shell2bf();
+  const auto nbf = obs.nbf();
+  const auto ndf = dfbs.nbf();
+
+  Eigen::Tensor<double, 3> result(ndf, nbf, nbf);
+  const auto &buf = engine.results();
+  for (auto s1 = 0; s1 != nshells_df; ++s1) {
+    const auto bf1 = shell2bf_df[s1];
+    const auto n1 = dfbs[s1].size();
+    for (auto s2 = 0; s2 != nshells; ++s2) {
+      const auto bf2 = shell2bf[s2];
+      const auto n2 = obs[s2].size();
+      for (auto s3 = 0; s3 != nshells;
+           ++s3) {  // only loop over unique ao shells
+        const auto bf3 = shell2bf[s3];
+        const auto n3 = obs[s3].size();
+        engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
+            dfbs[s1], unitshell, obs[s2], obs[s3]);
+        size_t fij = 0;
+        for (auto f = 0; f != n1; ++f) {
+          for (auto i = 0; i != n2; ++i) {
+            for (auto j = 0; j != n3; ++j) {
+              result(bf1 + f, bf2 + i, bf3 + j) = buf[0][fij];
+              ++fij;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) {
+  const auto ndf = dfbs.nbf();
+  Eigen::MatrixXd result(ndf, ndf);
+  result.fill(0.0);
+  libint2::Engine engine(libint2::Operator::coulomb, dfbs.max_nprim(),
+                         dfbs.max_l(), 0);
+  engine.set(BraKet::xs_xs);
+  engine.set(libint2::ScreeningMethod::Conservative);
+  const auto shell2bf_df = dfbs.shell2bf();
+  const auto &buf = engine.results();
+  for (size_t s1 = 0; s1 != dfbs.size(); ++s1) {
+    const size_t bf1 = shell2bf_df[s1];
+    const size_t n1 = dfbs[s1].size();
+    for (size_t s2 = 0; s2 != dfbs.size(); ++s2) {
+      const size_t bf2 = shell2bf_df[s2];
+      const size_t n2 = dfbs[s2].size();
+      engine.compute(dfbs[s1], dfbs[s2]);
+      // "map" buffer to a const Eigen Matrix, and copy it to the corresponding
+      // blocks of the result
+      Eigen::Map<const Eigen::MatrixXd> buf_mat(buf[0], n1, n2);
+      result.block(bf1, bf2, n1, n2) = buf_mat;
+    }
+  }
+  return result;
+}
+
+Eigen::Tensor<double, 4> compute_eri4(const BasisSet &obs) {
+  const auto nshells = obs.size();
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), obs.max_nprim()),
+                         std::max(obs.max_l(), obs.max_l()), 0);
+  engine.set(BraKet::xx_xx);
+  const auto shell2bf = obs.shell2bf();
+  const auto nbf = obs.nbf();
+
+  Eigen::Tensor<double, 4> result(nbf, nbf, nbf, nbf);
+  const auto &buf = engine.results();
+  for (auto s1 = 0; s1 != nshells; ++s1) {
+    const auto bf1 = shell2bf[s1];
+    const auto n1 = obs[s1].size();
+    // TODO: loop over all unique quartets: s1 >= s2 >= s3 >= s4
+    for (auto s2 = 0; s2 != nshells; ++s2) {
+      const auto bf2 = shell2bf[s2];
+      const auto n2 = obs[s2].size();
+      for (auto s3 = 0; s3 != nshells; ++s3) {
+        const auto bf3 = shell2bf[s3];
+        const auto n3 = obs[s3].size();
+        for (auto s4 = 0; s4 != nshells; ++s4) {
+          const auto bf4 = shell2bf[s4];
+          const auto n4 = obs[s4].size();
+          engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+              obs[s1], obs[s2], obs[s3], obs[s4]);
+          size_t ijkl = 0;
+          for (auto i = 0; i != n1; ++i) {
+            for (auto j = 0; j != n2; ++j) {
+              for (auto k = 0; k != n3; ++k) {
+                for (auto l = 0; l != n4; ++l) {
+                  result(bf1 + i, bf2 + j, bf3 + k, bf4 + l) = buf[0][ijkl];
+                  ++ijkl;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result;
+}
+
+TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
+  // Will use Neon as a test case
+  libint2::Atom atom;
+  atom.atomic_number = 2;
+  atom.x = 0.0;
+  atom.y = 0.0;
+  atom.z = 0.0;
+
+  // Using STO-3G basis set for Neon
+  libint2::BasisSet bs("STO-3G", {atom});
+  const auto nao = bs.nbf();
+
+  // Use automatic DF basis set generator with tight cholesky threshold
+  libint2::DFBasisSetGenerator dfbs_generator(bs.shells(), 1e-4);
+  const auto dfbs = dfbs_generator.reduced_basis();
+  const auto ndf = dfbs.nbf();
+
+  // Compute 2-center integrals
+  const auto eri2 = compute_eri2(dfbs);
+
+  // Compute 3-center integrals
+  const auto eri3 = compute_eri3(bs, dfbs);
+
+  // Compute 4-center integrals
+  const auto eri4 = compute_eri4(bs);
+
+  // Compute L_inv
+  Eigen::LLT<Eigen::MatrixXd> V_LLt(eri2);
+  Eigen::MatrixXd I = Eigen::MatrixXd::Identity(ndf, ndf);
+  auto L = V_LLt.matrixL();
+  Eigen::MatrixXd Linv_t = L.solve(I).transpose();
+  auto eri2_inv = Linv_t * Linv_t.transpose();
+
+  // reconstruct 4-center integrals and compare with original
+  double norm = 0.0;
+  Eigen::Tensor<double, 4> eri4_df(nao, nao, nao, nao);
+  for (size_t i = 0; i < nao; i++) {
+    for (size_t j = 0; j < nao; j++) {
+      for (size_t k = 0; k < nao; k++) {
+        for (size_t l = 0; l < nao; l++) {
+          for (size_t X = 0; X < ndf; X++) {
+            for (size_t Y = 0; Y < ndf; Y++) {
+              eri4_df(i, j, k, l) +=
+                  eri3(X, i, j) * eri2_inv(X, Y) * eri3(Y, k, l);
+            }
+          }
+          norm += std::pow(eri4_df(i, j, k, l) - eri4(i, j, k, l), 2);
+        }
+      }
+    }
+  }
+  REQUIRE_NOTHROW(norm < 1e-10);
+}

From e272283f49fd73b802b5338de4a7617257997cbb Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 18 Jan 2024 10:50:12 -0500
Subject: [PATCH 25/44] `DFBasisGenerator` uses FD occupation number as
 importance factor

---
 include/libint2/dfbs_generator.h | 112 ++++++++++++++++---
 include/libint2/soad_fock.h      | 177 +++++++++++++++++++++++++++++++
 2 files changed, 274 insertions(+), 15 deletions(-)
 create mode 100644 include/libint2/soad_fock.h

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index da01cde23..f5b007452 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -26,6 +26,7 @@
 #include <libint2/basis.h>
 #include <libint2/boys.h>
 #include <libint2/pivoted_cholesky.h>
+#include <libint2/soad_fock.h>
 #include <math.h>
 
 #include <Eigen/Dense>
@@ -127,17 +128,27 @@ inline std::vector<size_t> map_shell_to_basis_function(
   return result;
 }
 
-/// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
+/// @brief computes 2 indexed integrals for an operator from a set of shells
+/// @param op operator
 /// @param shells set of shells
+/// @param atoms vector of atoms
 /// @return Coulomb matrix
-inline Eigen::MatrixXd compute_coulomb_matrix(
-    const std::vector<Shell> &shells) {
+inline Eigen::MatrixXd compute_2indexed_ints(const Operator &op,
+                                             const std::vector<Shell> &shells,
+                                             const std::vector<Atom> atoms) {
   const auto n = nbf(shells);
   Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
   using libint2::Engine;
-  Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
-  engine.set(BraKet::xs_xs);
-  engine.set(ScreeningMethod::Conservative);
+  Engine engine(op, max_nprim(shells), max_l(shells));
+  if (op == libint2::Operator::coulomb) {
+    engine.set(BraKet::xs_xs);
+    engine.set(ScreeningMethod::Conservative);
+  }
+  if (op == libint2::Operator::nuclear) {
+    engine.
+
+        set_params(libint2::make_point_charges(atoms));
+  }
   const auto shell2bf = map_shell_to_basis_function(shells);
   const auto &buf = engine.results();
   for (size_t s1 = 0; s1 != shells.size(); ++s1) {
@@ -175,7 +186,8 @@ inline std::vector<std::vector<Shell>> split_by_L(
 /// pivoted Cholesky decomposition
 /// @return reduced set of product functions
 inline std::vector<Shell> shell_pivoted_cholesky(
-    const std::vector<Shell> &shells, const double cholesky_threshold) {
+    const std::vector<Shell> &shells, const double cholesky_threshold,
+    bool do_fd = false, std::vector<double> fd_occ_vec = {}) {
   const auto n = shells.size();  // number of shells
   std::vector<size_t>
       shell_indices;  // hash map of basis function indices to shell indices
@@ -189,7 +201,18 @@ inline std::vector<Shell> shell_pivoted_cholesky(
       shell_indices.push_back(i);
   }
   assert(shell_indices.size() == nbf);
-  const auto C = compute_coulomb_matrix(shells);
+  Atom dummy_atom;
+  auto C =
+      compute_2indexed_ints(libint2::Operator::coulomb, shells, {dummy_atom});
+
+  if (do_fd) {
+    for (size_t i = 0; i < C.rows(); ++i) {
+      for (size_t j = 0; j < C.cols(); ++j) {
+        C(i, j) *= std::sqrt(fd_occ_vec[i] * fd_occ_vec[j]);
+      }
+    }
+  }
+
   std::vector<size_t> pivot(nbf);
   for (auto i = 0; i < nbf; ++i) {
     pivot[i] = i;
@@ -236,17 +259,65 @@ class DFBasisSetGenerator {
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
   DFBasisSetGenerator(std::string obs_name, const Atom &atom,
-                      const double cholesky_threshold = 1e-7) {
+                      const double cholesky_threshold = 1e-7,
+                      bool do_fd = false, std::string minbs_name = "MINI") {
     // get AO basis shells for each atom
     const auto atom_bs = BasisSet(obs_name, {atom});
     const auto obs_shells = atom_bs.shells();
     // get primitive shells from AO functions
     const auto primitive_shells = detail::uncontract(obs_shells);
     // compute candidate shells
-    candidate_shells_ = detail::candidate_functions(primitive_shells);
+    auto candidate_shells_unsplitted =
+        detail::candidate_functions(primitive_shells);
+    // split candidate shells by angular momentum
+    auto candidate_shells_split =
+        detail::split_by_L(candidate_shells_unsplitted);
+
+    for (size_t i = 0; i < candidate_shells_split.size(); ++i) {
+      candidate_shells_.insert(candidate_shells_.end(),
+                               candidate_shells_split[i].begin(),
+                               candidate_shells_split[i].end());
+    }
+
+    std::vector<size_t> L_hash_map;
+    for (auto &&shellvec : candidate_shells_split) {
+      L_hash_map.push_back(nbf(shellvec));
+    }
+
     cholesky_threshold_ = cholesky_threshold;
-  }
+    do_fd_ = do_fd;
+    if (do_fd_) {
+      const auto minbs = BasisSet(minbs_name, {atom});
+      const auto T = detail::compute_2indexed_ints(libint2::Operator::kinetic,
+                                                   candidate_shells_, {atom});
+      const auto V = detail::compute_2indexed_ints(libint2::Operator::nuclear,
+                                                   candidate_shells_, {atom});
+      BasisSet candidate_bs(candidate_shells_);
+      auto H = T + V;
+      auto F_soad = compute_soad_fock(candidate_bs, minbs, {atom});
+      auto F = H + F_soad;
+
+      size_t L = 0;
+      size_t n_L_nfs = 0;
 
+      auto base_val = F(0, 0);
+
+      fd_occ_vec_.resize(L_hash_map.size());
+      for (size_t i = 0; i < F.cols(); ++i) {
+        if (n_L_nfs != L_hash_map[L]) {
+          fd_occ_vec_[L].push_back(1. / (1. + std::exp(F(i, i) / 10000.))
+                                   // kbT = 1.0 this needs to be changed
+          );
+          ++n_L_nfs;
+        } else {
+          ++L;
+          n_L_nfs = 1;
+          fd_occ_vec_[L].push_back(1. / (1. + std::exp(F(i, i) / 10000.)));
+          // kbT = 1.0 this needs to be changed);
+        }
+      }
+    }
+  }
   /// @brief constructor for DFBS generator class, generates density fitting
   /// basis set from products of AO shells provided by user
   /// @param cluster vector of vector of shells for each atom
@@ -276,11 +347,19 @@ class DFBasisSetGenerator {
           detail::split_by_L(candidate_shells_);
       for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
         std::vector<Shell> reduced_shells_L;
-        if (candidate_splitted_in_L[i].size() > 1)
-          reduced_shells_L = detail::shell_pivoted_cholesky(
-              candidate_splitted_in_L[i], cholesky_threshold_);
-        else
+        if (candidate_splitted_in_L[i].size() > 1) {
+          if (do_fd_)
+            reduced_shells_L = detail::shell_pivoted_cholesky(
+                candidate_splitted_in_L[i], cholesky_threshold_, do_fd_,
+                fd_occ_vec_[i]);
+          else
+            reduced_shells_L = detail::shell_pivoted_cholesky(
+                candidate_splitted_in_L[i], cholesky_threshold_);
+        } else
           reduced_shells_L = candidate_splitted_in_L[i];
+
+        std::cout << "Number of shells for L = " << i << " is "
+                  << reduced_shells_L.size() << std::endl;
         reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
                                reduced_shells_L.end());
       }
@@ -299,6 +378,9 @@ class DFBasisSetGenerator {
   std::vector<Shell> candidate_shells_;  // full set of product functions
   std::vector<Shell> reduced_shells_;    // reduced set of product functions
   bool reduced_shells_computed_ = false;
+  bool do_fd_;
+  std::vector<std::vector<double>>
+      fd_occ_vec_;  // occupation vector for FD scaling factors separated by L
 };
 
 }  // namespace libint2
diff --git a/include/libint2/soad_fock.h b/include/libint2/soad_fock.h
new file mode 100644
index 000000000..d7a8e7dfa
--- /dev/null
+++ b/include/libint2/soad_fock.h
@@ -0,0 +1,177 @@
+/*
+ *  Copyright (C) 2004-2021 Edward F. Valeev
+ *
+ *  This file is part of Libint.
+ *
+ *  Libint is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  Libint is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef _libint2_src_lib_libint_soad_fock_h_
+#define _libint2_src_lib_libint_soad_fock_h_
+
+#include <libint2/atom.h>
+#include <libint2/basis.h>
+
+#include <Eigen/Dense>
+
+namespace libint2 {
+
+// computes Superposition-Of-Atomic-Densities guess for the molecular density
+// matrix
+// in minimal basis; occupies subshells by smearing electrons evenly over the
+// orbitals
+Eigen::MatrixXd compute_soad(const std::vector<Atom> &atoms) {
+  // compute number of atomic orbitals
+  size_t nao = 0;
+  for (const auto &atom : atoms) {
+    const auto Z = atom.atomic_number;
+    nao += libint2::sto3g_num_ao(Z);
+  }
+
+  // compute the minimal basis density
+  Eigen::MatrixXd D = Eigen::MatrixXd::Zero(nao, nao);
+  size_t ao_offset = 0;  // first AO of this atom
+  for (const auto &atom : atoms) {
+    const auto Z = atom.atomic_number;
+    const auto &occvec = libint2::sto3g_ao_occupation_vector(Z);
+    for (const auto &occ : occvec) {
+      D(ao_offset, ao_offset) = occ;
+      ++ao_offset;
+    }
+  }
+
+  return D * 0.5;  // we use densities normalized to # of electrons/2
+}
+
+Eigen::MatrixXd compute_2body_fock_general(const BasisSet &obs,
+                                           const Eigen::MatrixXd &D,
+                                           const BasisSet &D_bs,
+                                           bool D_is_shelldiagonal,
+                                           double precision) {
+  const auto n = obs.nbf();
+  const auto nshells = obs.size();
+  const auto n_D = D_bs.nbf();
+  assert(D.cols() == D.rows() && D.cols() == n_D);
+
+  Eigen::MatrixXd G = Eigen::MatrixXd ::Zero(n, n);
+
+  // construct the 2-electron repulsion integrals engine
+  libint2::Engine engine(libint2::Operator::coulomb,
+                         std::max(obs.max_nprim(), D_bs.max_nprim()),
+                         std::max(obs.max_l(), D_bs.max_l()), 0);
+  engine.set_precision(precision);
+  auto shell2bf = obs.shell2bf();
+  auto shell2bf_D = D_bs.shell2bf();
+
+  const auto &buf = engine.results();
+
+  // loop over permutationally-unique set of shells
+  for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
+    auto bf1_first = shell2bf[s1];  // first basis function in this shell
+    auto n1 = obs[s1].size();       // number of basis functions in this shell
+
+    for (auto s2 = 0; s2 <= s1; ++s2) {
+      auto bf2_first = shell2bf[s2];
+      auto n2 = obs[s2].size();
+
+      for (auto s3 = 0; s3 < D_bs.size(); ++s3) {
+        auto bf3_first = shell2bf_D[s3];
+        auto n3 = D_bs[s3].size();
+
+        auto s4_begin = D_is_shelldiagonal ? s3 : 0;
+        auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size();
+
+        for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) {
+          auto bf4_first = shell2bf_D[s4];
+          auto n4 = D_bs[s4].size();
+
+          // compute the permutational degeneracy (i.e. # of equivalents) of
+          // the given shell set
+          auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
+
+          if (s3 >= s4) {
+            auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
+            auto s1234_deg = s12_deg * s34_deg;
+            // auto s1234_deg = s12_deg;
+            engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+                obs[s1], obs[s2], D_bs[s3], D_bs[s4]);
+            const auto *buf_1234 = buf[0];
+            if (buf_1234 != nullptr) {
+              for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
+                const auto bf1 = f1 + bf1_first;
+                for (auto f2 = 0; f2 != n2; ++f2) {
+                  const auto bf2 = f2 + bf2_first;
+                  for (auto f3 = 0; f3 != n3; ++f3) {
+                    const auto bf3 = f3 + bf3_first;
+                    for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
+                      const auto bf4 = f4 + bf4_first;
+
+                      const auto value = buf_1234[f1234];
+                      const auto value_scal_by_deg = value * s1234_deg;
+                      G(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg;
+                    }
+                  }
+                }
+              }
+            }
+          }
+
+          engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
+              obs[s1], D_bs[s3], obs[s2], D_bs[s4]);
+          const auto *buf_1324 = buf[0];
+          if (buf_1324 == nullptr)
+            continue;  // if all integrals screened out, skip to next quartet
+
+          for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) {
+            const auto bf1 = f1 + bf1_first;
+            for (auto f3 = 0; f3 != n3; ++f3) {
+              const auto bf3 = f3 + bf3_first;
+              for (auto f2 = 0; f2 != n2; ++f2) {
+                const auto bf2 = f2 + bf2_first;
+                for (auto f4 = 0; f4 != n4; ++f4, ++f1324) {
+                  const auto bf4 = f4 + bf4_first;
+
+                  const auto value = buf_1324[f1324];
+                  const auto value_scal_by_deg = value * s12_deg;
+                  G(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // symmetrize the result and return
+  return 0.5 * (G + G.transpose());
+}
+
+Eigen::MatrixXd compute_soad_fock(const BasisSet &obs, const BasisSet &minbs,
+                                  const std::vector<Atom> &atoms) {
+  // use SOAD as guess for the density matrix
+  auto D = compute_soad(atoms);
+  // compute fock with guess density
+  auto F = compute_2body_fock_general(
+      obs, D, minbs, true /* SOAD_D_is_shelldiagonal */,
+      std::numeric_limits<double>::epsilon()  // this is cheap, no reason
+                                              // to be cheaper
+  );
+  return F;
+}
+
+}  // namespace libint2
+
+#endif  //_libint2_src_lib_libint_soad_fock_h_

From 78fb30d9552f912a9ace018ae8ba38e5455975b6 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Tue, 23 Jan 2024 16:12:51 -0500
Subject: [PATCH 26/44] generalized weights handeling in `DFBasisGenerator`
 with` std::tuple<Shell,weights>`, uses diagonal elements of Density matrix as
 weights

---
 include/libint2/dfbs_generator.h | 356 ++++++++++++++++++++++---------
 1 file changed, 258 insertions(+), 98 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index f5b007452..5334ffc8c 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -32,11 +32,100 @@
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
 #include <algorithm>
+#include <tuple>
 
 namespace libint2 {
 
 namespace detail {
 
+inline double fd_occupation(double e) { return 1. / (1. + std::exp(e)); }
+
+/// computes a map for contraction vector in a shell
+/// @param[in] shell libint shell of which a map is to be computed
+/// @return vector of lower bound indices to shell contraction vector
+inline std::vector<size_t> shell_hashmap(const libint2::Shell &shell) {
+  std::vector<size_t> shell_map;
+  auto contr = shell.contr;
+  size_t n = 0;
+  for (const auto &a : contr) {
+    shell_map.push_back(n);
+    n += a.size();
+  }
+  return shell_map;
+}
+
+/// computes contraction matrix for an uncontracted shell to a contracted shell
+/// @param[in] unc_shell a uncontracted shell
+/// @param[in] contr_shell a contracted shell
+/// @return a contraction matrix from uncontracted shell to contracted shell
+inline Eigen::MatrixXd shell_contraction_matrix(
+    const libint2::Shell &unc_shell, const libint2::Shell &contr_shell) {
+  Eigen::MatrixXd result(unc_shell.size(), contr_shell.size());
+  result.fill(0.0);
+  // check to see if the shell belongs to same center
+  if (unc_shell.O == contr_shell.O) {
+    auto contr_exps = contr_shell.alpha;
+    auto unc_exp = unc_shell.alpha[0];
+    // check if the uncontracted primitive exponent present in the contracted
+    // shell
+    if (std::find(contr_exps.begin(), contr_exps.end(), unc_exp) !=
+        contr_exps.end()) {
+      // get index of the exponent in contracted shell
+      std::int64_t exponent_index = 0;
+      for (auto i = 0; i < contr_exps.size(); i++) {
+        if (unc_exp == contr_exps[i]) exponent_index = i;
+      }
+      // create hashmaps for all contractions in contracted shell
+      auto unc_hashmap = shell_hashmap(unc_shell);
+      auto contr_hashmap = shell_hashmap(contr_shell);
+      // iterate through contractions in contracted shell
+      for (auto p1 = 0; p1 < unc_shell.contr.size(); p1++) {
+        for (auto p2 = 0; p2 < contr_shell.contr.size(); p2++) {
+          auto unc_contr = unc_shell.contr[p1];
+          auto contr_contr = contr_shell.contr[p2];
+          auto n1 = unc_contr.size();
+          auto n2 = contr_contr.size();
+          // check to see if the contraction belongs to same am (l)
+          if (unc_contr.l == contr_contr.l) {
+            Eigen::MatrixXd block(unc_contr.size(), contr_contr.size());
+            block.fill(0.0);
+            // fill diagonal elements with transformation coefficient
+            for (auto diag = 0; diag < contr_contr.size(); diag++) {
+              block(diag, diag) =
+                  contr_contr.coeff[exponent_index] / unc_contr.coeff[0];
+            }
+            result.block(unc_hashmap[p1], contr_hashmap[p2], n1, n2) = block;
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  else
+    return result;
+}
+
+inline Eigen::MatrixXd contraction_matrix(const BasisSet &primitive_basis,
+                                          const BasisSet &contracted_basis) {
+  const auto &unc_shell2bf = primitive_basis.shell2bf();
+  const auto &contr_shell2bf = contracted_basis.shell2bf();
+  const auto &unc_shells = primitive_basis.shells();
+  const auto &contr_shells = contracted_basis.shells();
+  Eigen::MatrixXd result(primitive_basis.nbf(), contracted_basis.nbf());
+  result.fill(0.0);
+  for (auto s1 = 0; s1 < unc_shells.size(); s1++) {
+    for (auto s2 = 0; s2 < contr_shells.size(); s2++) {
+      auto n1 = unc_shells[s1].size();
+      auto n2 = contr_shells[s2].size();
+      result.block(unc_shell2bf[s1], contr_shell2bf[s2], n1, n2) =
+          shell_contraction_matrix(unc_shells[s1], contr_shells[s2]);
+    }
+  }
+
+  return result;
+}
+
 /// Computes uncontracted shells from a vector of shells
 /// @param[in] cluster a vector of shells
 /// @return a vector of uncontracted shells
@@ -104,16 +193,37 @@ inline std::vector<Shell> product_functions(
   return product_functions;
 }
 
-/// @brief creates a set of candidate product shells from a set of primitive
-/// shells
-/// @param primitive_shells set of primitive shells
-/// @return set of candidate product shells
-inline std::vector<Shell> candidate_functions(
-    const std::vector<Shell> &primitive_shells) {
-  return product_functions(primitive_shells);
+inline std::vector<std::tuple<Shell, double>> product_functions(
+    const std::vector<std::tuple<Shell, double>> &shells_weights) {
+  std::vector<std::tuple<Shell, double>> product_functions;
+  for (size_t i = 0; i < shells_weights.size(); ++i) {
+    for (size_t j = 0; j <= i; ++j) {
+      const auto si = get<0>(shells_weights[i]);
+      const auto sj = get<0>(shells_weights[j]);
+      const auto li = si.contr[0].l;
+      const auto lj = sj.contr[0].l;
+      for (auto L = std::abs(li - lj); L <= li + lj; L++) {
+        const auto alpha = libint2::svector<double>({alpha_eff(si, sj, L)});
+        libint2::svector<Shell::Contraction> contr_;
+        Shell::Contraction contr1;
+        contr1.l = L;
+        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
+        contr1.coeff = {1.0};
+        contr_.push_back(contr1);
+        assert(si.O == sj.O);
+        const auto shell = Shell(alpha, contr_, si.O);
+        const auto weight =
+            get<1>(shells_weights[i]) * get<1>(shells_weights[j]);
+        std::tuple<Shell, double> shell_weight{shell, weight};
+        if (std::find(product_functions.begin(), product_functions.end(),
+                      shell_weight) == product_functions.end())
+          product_functions.emplace_back(shell_weight);
+      }
+    }
+  }
+  return product_functions;
 }
 
-/// @brief returns a hash map of shell indices to basis function indices
 inline std::vector<size_t> map_shell_to_basis_function(
     const std::vector<libint2::Shell> &shells) {
   std::vector<size_t> result;
@@ -149,7 +259,7 @@ inline Eigen::MatrixXd compute_2indexed_ints(const Operator &op,
 
         set_params(libint2::make_point_charges(atoms));
   }
-  const auto shell2bf = map_shell_to_basis_function(shells);
+  const auto shell2bf = detail::map_shell_to_basis_function(shells);
   const auto &buf = engine.results();
   for (size_t s1 = 0; s1 != shells.size(); ++s1) {
     auto bf1 = shell2bf[s1];
@@ -179,15 +289,40 @@ inline std::vector<std::vector<Shell>> split_by_L(
   return sorted_shells;
 }
 
+inline std::vector<std::vector<std::tuple<Shell, double>>> split_by_L(
+    const std::vector<std::tuple<Shell, double>> &shells_weights_vec,
+    int lmax) {
+  std::vector<std::vector<std::tuple<Shell, double>>> result;
+  result.resize(lmax + 1);
+  for (auto &&candidate : shells_weights_vec) {
+    auto l = get<0>(candidate).contr[0].l;
+    result[l].push_back(candidate);
+  }
+  return result;
+}
+
 /// @brief computes the reduced set of product functions via pivoted Cholesky
 /// decomposition
-/// @param shells set of shells
+/// @param shells_weights set of tuples of shells and their weights
 /// @param cholesky_threshold threshold for choosing a product function via
 /// pivoted Cholesky decomposition
 /// @return reduced set of product functions
+
 inline std::vector<Shell> shell_pivoted_cholesky(
-    const std::vector<Shell> &shells, const double cholesky_threshold,
-    bool do_fd = false, std::vector<double> fd_occ_vec = {}) {
+    const std::vector<std::tuple<Shell, double>> &shells_weights,
+    const double cholesky_threshold) {
+  std::vector<Shell> shells;
+  std::vector<double> weights;
+
+  for (auto &&shell_weight : shells_weights) {
+    const auto shell = get<0>(shell_weight);
+    shells.push_back(shell);
+    const auto L = shell.contr[0].l;
+    for (size_t l = 0; l < 2 * L + 1; ++l) {
+      weights.push_back(get<1>(shell_weight));
+    }
+  }
+
   const auto n = shells.size();  // number of shells
   std::vector<size_t>
       shell_indices;  // hash map of basis function indices to shell indices
@@ -205,11 +340,9 @@ inline std::vector<Shell> shell_pivoted_cholesky(
   auto C =
       compute_2indexed_ints(libint2::Operator::coulomb, shells, {dummy_atom});
 
-  if (do_fd) {
-    for (size_t i = 0; i < C.rows(); ++i) {
-      for (size_t j = 0; j < C.cols(); ++j) {
-        C(i, j) *= std::sqrt(fd_occ_vec[i] * fd_occ_vec[j]);
-      }
+  for (size_t i = 0; i < C.rows(); ++i) {
+    for (size_t j = 0; j < C.cols(); ++j) {
+      C(i, j) *= std::sqrt(weights[i] * weights[j]);
     }
   }
 
@@ -260,112 +393,136 @@ class DFBasisSetGenerator {
   /// pivoted Cholesky decomposition
   DFBasisSetGenerator(std::string obs_name, const Atom &atom,
                       const double cholesky_threshold = 1e-7,
-                      bool do_fd = false, std::string minbs_name = "MINI") {
+                      bool use_weights = false,
+                      std::string minbs_name = "MINI") {
     // get AO basis shells for each atom
-    const auto atom_bs = BasisSet(obs_name, {atom});
-    const auto obs_shells = atom_bs.shells();
-    // get primitive shells from AO functions
-    const auto primitive_shells = detail::uncontract(obs_shells);
-    // compute candidate shells
-    auto candidate_shells_unsplitted =
-        detail::candidate_functions(primitive_shells);
-    // split candidate shells by angular momentum
-    auto candidate_shells_split =
-        detail::split_by_L(candidate_shells_unsplitted);
-
-    for (size_t i = 0; i < candidate_shells_split.size(); ++i) {
-      candidate_shells_.insert(candidate_shells_.end(),
-                               candidate_shells_split[i].begin(),
-                               candidate_shells_split[i].end());
-    }
-
-    std::vector<size_t> L_hash_map;
-    for (auto &&shellvec : candidate_shells_split) {
-      L_hash_map.push_back(nbf(shellvec));
-    }
-
+    atom_ = atom;
+    obs_ = BasisSet(obs_name, {atom_});
+    minbs_ = BasisSet(minbs_name, {atom_});
     cholesky_threshold_ = cholesky_threshold;
-    do_fd_ = do_fd;
-    if (do_fd_) {
-      const auto minbs = BasisSet(minbs_name, {atom});
-      const auto T = detail::compute_2indexed_ints(libint2::Operator::kinetic,
-                                                   candidate_shells_, {atom});
-      const auto V = detail::compute_2indexed_ints(libint2::Operator::nuclear,
-                                                   candidate_shells_, {atom});
-      BasisSet candidate_bs(candidate_shells_);
-      auto H = T + V;
-      auto F_soad = compute_soad_fock(candidate_bs, minbs, {atom});
-      auto F = H + F_soad;
-
-      size_t L = 0;
-      size_t n_L_nfs = 0;
-
-      auto base_val = F(0, 0);
-
-      fd_occ_vec_.resize(L_hash_map.size());
-      for (size_t i = 0; i < F.cols(); ++i) {
-        if (n_L_nfs != L_hash_map[L]) {
-          fd_occ_vec_[L].push_back(1. / (1. + std::exp(F(i, i) / 10000.))
-                                   // kbT = 1.0 this needs to be changed
-          );
-          ++n_L_nfs;
-        } else {
-          ++L;
-          n_L_nfs = 1;
-          fd_occ_vec_[L].push_back(1. / (1. + std::exp(F(i, i) / 10000.)));
-          // kbT = 1.0 this needs to be changed);
-        }
-      }
-    }
+    use_weights_ = use_weights;
+    if (use_weights_) compute_weights();
+    candidates_ = candidates();
   }
   /// @brief constructor for DFBS generator class, generates density fitting
   /// basis set from products of AO shells provided by user
   /// @param cluster vector of vector of shells for each atom
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
-  DFBasisSetGenerator(const std::vector<Shell> &shells,
-                      const double cholesky_threshold = 1e-7) {
-    const auto primitive_shells = detail::uncontract(shells);
-    candidate_shells_ = detail::candidate_functions(primitive_shells);
+  DFBasisSetGenerator(const std::vector<Shell> &shells, const Atom &atom,
+                      const double cholesky_threshold = 1e-7,
+                      bool use_weights = false,
+                      const std::vector<Shell> &mini_shells = {}) {
+    atom_ = atom;
+    obs_ = BasisSet(shells);
+    minbs_ = BasisSet(mini_shells);
     cholesky_threshold_ = cholesky_threshold;
+    use_weights_ = use_weights;
+    if (use_weights_) {
+      compute_weights();
+    }
+    candidates_ = candidates();
   }
 
   DFBasisSetGenerator() = default;
 
   ~DFBasisSetGenerator() = default;
 
-  /// @brief returns the candidate shells (full set of product functions)
-  std::vector<Shell> candidate_shells() { return candidate_shells_; }
-
   /// @brief returns the reduced shells (reduced set of product functions)
   /// computed via pivoted Cholesky decomposition
   std::vector<Shell> reduced_shells() {
     if (reduced_shells_computed_)
       return reduced_shells_;
     else {
-      const auto candidate_splitted_in_L =
-          detail::split_by_L(candidate_shells_);
-      for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
+      const auto candidates_in_L =
+          detail::split_by_L(candidates_, 2 * max_l(obs_.shells()));
+      for (size_t i = 0; i < candidates_in_L.size(); ++i) {
         std::vector<Shell> reduced_shells_L;
-        if (candidate_splitted_in_L[i].size() > 1) {
-          if (do_fd_)
-            reduced_shells_L = detail::shell_pivoted_cholesky(
-                candidate_splitted_in_L[i], cholesky_threshold_, do_fd_,
-                fd_occ_vec_[i]);
-          else
-            reduced_shells_L = detail::shell_pivoted_cholesky(
-                candidate_splitted_in_L[i], cholesky_threshold_);
+        if (candidates_in_L[i].size() > 1) {
+          reduced_shells_L = detail::shell_pivoted_cholesky(
+              candidates_in_L[i], cholesky_threshold_);
         } else
-          reduced_shells_L = candidate_splitted_in_L[i];
+          reduced_shells_L = {get<0>(candidates_in_L[i][0])};
 
-        std::cout << "Number of shells for L = " << i << " is "
+        std::cout << "Number of shells in L = " << i << " is "
                   << reduced_shells_L.size() << std::endl;
         reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
                                reduced_shells_L.end());
       }
       reduced_shells_computed_ = true;
+      return reduced_shells_;
+    }
+  }
+
+  /// @brief returns a set of weights of primitive obs shells
+  void compute_weights() {
+    const auto T = detail::compute_2indexed_ints(libint2::Operator::kinetic,
+                                                 obs_.shells(), {atom_});
+    const auto V = detail::compute_2indexed_ints(libint2::Operator::nuclear,
+                                                 obs_.shells(), {atom_});
+    auto H = T + V;
+    auto F_soad = compute_soad_fock(obs_, minbs_, {atom_});
+    auto F = H + F_soad;
+    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> es(F);
+    auto C = es.eigenvectors();
+    auto E = es.eigenvalues();
+    auto primitive_bs = BasisSet(detail::uncontract(obs_.shells()));
+    const auto contraction_matrix =
+        detail::contraction_matrix(primitive_bs, obs_);
+    auto C_unc = contraction_matrix * C;
+
+    const auto nocc = atom_.atomic_number / 2;
+    auto C_occ = C_unc.leftCols(nocc);
+    auto D = C_occ * C_occ.transpose();
+
+    // compute square of coefficients
+    //      Eigen::MatrixXd C_unc_sqr(C_unc.rows(),C_unc.cols());
+    //      for(size_t i=0;i<C_unc.rows();++i){
+    //          for(size_t j=0;j<C_unc.cols();++j){
+    //              C_unc_sqr(i,j) = C_unc(i,j)*C_unc(i,j);
+    //          }
+    //      }
+    //
+    //      std::vector<double> prim_func_weights;
+    //
+    //      for(size_t i=0;i<C_unc.rows();++i){
+    //          double max_weight = 0.0;
+    //          for(size_t j=0;j<C_unc.cols();++j){
+    //              max_weight = std::max(max_weight,detail::fd_occupation(E(j))
+    //              * C_unc_sqr(i,j));
+    //          }
+    //          prim_func_weights.push_back(max_weight);
+    //      }
+
+    auto prim_shells = primitive_bs.shells();
+    size_t nfns = 0;
+    for (size_t i = 0; i < prim_shells.size(); ++i) {
+      double max_weight = 0.0;
+      for (size_t j = 0; j < prim_shells[i].size(); ++j) {
+        // max_weight = std::max(max_weight, prim_func_weights[nfns]);
+        max_weight = std::max(max_weight, D.diagonal()(nfns));
+        nfns++;
+      }
+      prim_obs_weights_.push_back(max_weight);
+    }
+  }
+
+  const std::vector<std::tuple<Shell, double>> candidates() {
+    const auto prim_shells = detail::uncontract(obs_.shells());
+
+    // create tuple of shells and their weights
+    std::vector<std::tuple<Shell, double>> prim_shell_weights;
+    for (size_t i = 0; i < prim_shells.size(); ++i) {
+      std::tuple<Shell, double> shell_weight;
+      get<0>(shell_weight) = prim_shells[i];
+      if (use_weights_)
+        get<1>(shell_weight) = prim_obs_weights_[i];
+      else
+        get<1>(shell_weight) = 1.;
+      prim_shell_weights.push_back(shell_weight);
     }
-    return reduced_shells_;
+    const auto candidates = detail::product_functions(prim_shell_weights);
+    return candidates;  // return candidates once done
   }
 
   /// @brief returns the reduced basis set (reduced set of product
@@ -374,13 +531,16 @@ class DFBasisSetGenerator {
   const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
 
  private:
+  BasisSet obs_;
+  BasisSet minbs_;
+  Atom atom_;
   double cholesky_threshold_;
-  std::vector<Shell> candidate_shells_;  // full set of product functions
-  std::vector<Shell> reduced_shells_;    // reduced set of product functions
+  std::vector<std::tuple<Shell, double>>
+      candidates_;  // full set of product functions and their weights
+  std::vector<Shell> reduced_shells_;  // reduced set of product functions
   bool reduced_shells_computed_ = false;
-  bool do_fd_;
-  std::vector<std::vector<double>>
-      fd_occ_vec_;  // occupation vector for FD scaling factors separated by L
+  bool use_weights_;
+  std::vector<double> prim_obs_weights_;
 };
 
 }  // namespace libint2

From 434399450446ec96964b4845a8096d2086d680e4 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Wed, 24 Jan 2024 16:31:19 -0500
Subject: [PATCH 27/44] revert `DFBasisGenerator` back to basic cholesky
 implementation

---
 export/cmake/CMakeLists.txt.export   |   2 +-
 include/libint2/dfbs_generator.h     | 347 +++++++--------------------
 include/libint2/soad_fock.h          |   3 +-
 tests/hartree-fock/hartree-fock++.cc |  16 +-
 tests/unit/test-df-autogen.cc        | 103 ++++----
 5 files changed, 152 insertions(+), 319 deletions(-)

diff --git a/export/cmake/CMakeLists.txt.export b/export/cmake/CMakeLists.txt.export
index f28eed1c5..137b37ae7 100644
--- a/export/cmake/CMakeLists.txt.export
+++ b/export/cmake/CMakeLists.txt.export
@@ -424,7 +424,7 @@ if (LIBINT_HAS_CXX_API)
             tests/unit/test-c-api.cc
             tests/unit/test-core.cc
             tests/unit/test-core-ints.cc
-            #tests/unit/test-df-autogen.cc
+            tests/unit/test-df-autogen.cc
             tests/unit/test-permute.cc
             tests/unit/test-precision.cc
             tests/unit/test-shell-order.cc
diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 5334ffc8c..b4ba77d27 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2004-2021 Edward F. Valeev
+ *  Copyright (C) 2004-2024 Edward F. Valeev
  *
  *  This file is part of Libint.
  *
@@ -26,106 +26,16 @@
 #include <libint2/basis.h>
 #include <libint2/boys.h>
 #include <libint2/pivoted_cholesky.h>
-#include <libint2/soad_fock.h>
 #include <math.h>
 
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
 #include <algorithm>
-#include <tuple>
 
 namespace libint2 {
 
 namespace detail {
 
-inline double fd_occupation(double e) { return 1. / (1. + std::exp(e)); }
-
-/// computes a map for contraction vector in a shell
-/// @param[in] shell libint shell of which a map is to be computed
-/// @return vector of lower bound indices to shell contraction vector
-inline std::vector<size_t> shell_hashmap(const libint2::Shell &shell) {
-  std::vector<size_t> shell_map;
-  auto contr = shell.contr;
-  size_t n = 0;
-  for (const auto &a : contr) {
-    shell_map.push_back(n);
-    n += a.size();
-  }
-  return shell_map;
-}
-
-/// computes contraction matrix for an uncontracted shell to a contracted shell
-/// @param[in] unc_shell a uncontracted shell
-/// @param[in] contr_shell a contracted shell
-/// @return a contraction matrix from uncontracted shell to contracted shell
-inline Eigen::MatrixXd shell_contraction_matrix(
-    const libint2::Shell &unc_shell, const libint2::Shell &contr_shell) {
-  Eigen::MatrixXd result(unc_shell.size(), contr_shell.size());
-  result.fill(0.0);
-  // check to see if the shell belongs to same center
-  if (unc_shell.O == contr_shell.O) {
-    auto contr_exps = contr_shell.alpha;
-    auto unc_exp = unc_shell.alpha[0];
-    // check if the uncontracted primitive exponent present in the contracted
-    // shell
-    if (std::find(contr_exps.begin(), contr_exps.end(), unc_exp) !=
-        contr_exps.end()) {
-      // get index of the exponent in contracted shell
-      std::int64_t exponent_index = 0;
-      for (auto i = 0; i < contr_exps.size(); i++) {
-        if (unc_exp == contr_exps[i]) exponent_index = i;
-      }
-      // create hashmaps for all contractions in contracted shell
-      auto unc_hashmap = shell_hashmap(unc_shell);
-      auto contr_hashmap = shell_hashmap(contr_shell);
-      // iterate through contractions in contracted shell
-      for (auto p1 = 0; p1 < unc_shell.contr.size(); p1++) {
-        for (auto p2 = 0; p2 < contr_shell.contr.size(); p2++) {
-          auto unc_contr = unc_shell.contr[p1];
-          auto contr_contr = contr_shell.contr[p2];
-          auto n1 = unc_contr.size();
-          auto n2 = contr_contr.size();
-          // check to see if the contraction belongs to same am (l)
-          if (unc_contr.l == contr_contr.l) {
-            Eigen::MatrixXd block(unc_contr.size(), contr_contr.size());
-            block.fill(0.0);
-            // fill diagonal elements with transformation coefficient
-            for (auto diag = 0; diag < contr_contr.size(); diag++) {
-              block(diag, diag) =
-                  contr_contr.coeff[exponent_index] / unc_contr.coeff[0];
-            }
-            result.block(unc_hashmap[p1], contr_hashmap[p2], n1, n2) = block;
-          }
-        }
-      }
-    }
-    return result;
-  }
-
-  else
-    return result;
-}
-
-inline Eigen::MatrixXd contraction_matrix(const BasisSet &primitive_basis,
-                                          const BasisSet &contracted_basis) {
-  const auto &unc_shell2bf = primitive_basis.shell2bf();
-  const auto &contr_shell2bf = contracted_basis.shell2bf();
-  const auto &unc_shells = primitive_basis.shells();
-  const auto &contr_shells = contracted_basis.shells();
-  Eigen::MatrixXd result(primitive_basis.nbf(), contracted_basis.nbf());
-  result.fill(0.0);
-  for (auto s1 = 0; s1 < unc_shells.size(); s1++) {
-    for (auto s2 = 0; s2 < contr_shells.size(); s2++) {
-      auto n1 = unc_shells[s1].size();
-      auto n2 = contr_shells[s2].size();
-      result.block(unc_shell2bf[s1], contr_shell2bf[s2], n1, n2) =
-          shell_contraction_matrix(unc_shells[s1], contr_shells[s2]);
-    }
-  }
-
-  return result;
-}
-
 /// Computes uncontracted shells from a vector of shells
 /// @param[in] cluster a vector of shells
 /// @return a vector of uncontracted shells
@@ -193,37 +103,16 @@ inline std::vector<Shell> product_functions(
   return product_functions;
 }
 
-inline std::vector<std::tuple<Shell, double>> product_functions(
-    const std::vector<std::tuple<Shell, double>> &shells_weights) {
-  std::vector<std::tuple<Shell, double>> product_functions;
-  for (size_t i = 0; i < shells_weights.size(); ++i) {
-    for (size_t j = 0; j <= i; ++j) {
-      const auto si = get<0>(shells_weights[i]);
-      const auto sj = get<0>(shells_weights[j]);
-      const auto li = si.contr[0].l;
-      const auto lj = sj.contr[0].l;
-      for (auto L = std::abs(li - lj); L <= li + lj; L++) {
-        const auto alpha = libint2::svector<double>({alpha_eff(si, sj, L)});
-        libint2::svector<Shell::Contraction> contr_;
-        Shell::Contraction contr1;
-        contr1.l = L;
-        contr1.pure = true;  // libint2 needs solid harmonics for 2c2b integrals
-        contr1.coeff = {1.0};
-        contr_.push_back(contr1);
-        assert(si.O == sj.O);
-        const auto shell = Shell(alpha, contr_, si.O);
-        const auto weight =
-            get<1>(shells_weights[i]) * get<1>(shells_weights[j]);
-        std::tuple<Shell, double> shell_weight{shell, weight};
-        if (std::find(product_functions.begin(), product_functions.end(),
-                      shell_weight) == product_functions.end())
-          product_functions.emplace_back(shell_weight);
-      }
-    }
-  }
-  return product_functions;
+/// @brief creates a set of candidate product shells from a set of primitive
+/// shells
+/// @param primitive_shells set of primitive shells
+/// @return set of candidate product shells
+inline std::vector<Shell> candidate_functions(
+    const std::vector<Shell> &primitive_shells) {
+  return product_functions(primitive_shells);
 }
 
+/// @brief returns a hash map of shell indices to basis function indices
 inline std::vector<size_t> map_shell_to_basis_function(
     const std::vector<libint2::Shell> &shells) {
   std::vector<size_t> result;
@@ -238,28 +127,18 @@ inline std::vector<size_t> map_shell_to_basis_function(
   return result;
 }
 
-/// @brief computes 2 indexed integrals for an operator from a set of shells
-/// @param op operator
+/// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
 /// @param shells set of shells
-/// @param atoms vector of atoms
 /// @return Coulomb matrix
-inline Eigen::MatrixXd compute_2indexed_ints(const Operator &op,
-                                             const std::vector<Shell> &shells,
-                                             const std::vector<Atom> atoms) {
+inline Eigen::MatrixXd compute_coulomb_matrix(
+    const std::vector<Shell> &shells) {
   const auto n = nbf(shells);
   Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
   using libint2::Engine;
-  Engine engine(op, max_nprim(shells), max_l(shells));
-  if (op == libint2::Operator::coulomb) {
-    engine.set(BraKet::xs_xs);
-    engine.set(ScreeningMethod::Conservative);
-  }
-  if (op == libint2::Operator::nuclear) {
-    engine.
-
-        set_params(libint2::make_point_charges(atoms));
-  }
-  const auto shell2bf = detail::map_shell_to_basis_function(shells);
+  Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
+  engine.set(BraKet::xs_xs);
+  engine.set(ScreeningMethod::Conservative);
+  const auto shell2bf = map_shell_to_basis_function(shells);
   const auto &buf = engine.results();
   for (size_t s1 = 0; s1 != shells.size(); ++s1) {
     auto bf1 = shell2bf[s1];
@@ -289,16 +168,53 @@ inline std::vector<std::vector<Shell>> split_by_L(
   return sorted_shells;
 }
 
-inline std::vector<std::vector<std::tuple<Shell, double>>> split_by_L(
-    const std::vector<std::tuple<Shell, double>> &shells_weights_vec,
-    int lmax) {
-  std::vector<std::vector<std::tuple<Shell, double>>> result;
-  result.resize(lmax + 1);
-  for (auto &&candidate : shells_weights_vec) {
-    auto l = get<0>(candidate).contr[0].l;
-    result[l].push_back(candidate);
+/// @brief computes the reduced set of product functions via pivoted Cholesky
+/// decomposition
+/// @param shells set of shells
+/// @param cholesky_threshold threshold for choosing a product function via
+/// pivoted Cholesky decomposition
+/// @return reduced set of product functions
+inline std::vector<Shell> shell_pivoted_cholesky(
+    const std::vector<Shell> &shells, const double cholesky_threshold) {
+  const auto n = shells.size();  // number of shells
+  std::vector<size_t>
+      shell_indices;  // hash map of basis function indices to shell indices
+  const auto L = shells[0].contr[0].l;  // all shells must have same L
+  const auto nbf = libint2::nbf(
+      shells);  // total number of basis functions in vector of shells
+  for (size_t i = 0; i < n; ++i) {
+    for (size_t j = 0; j < 2 * L + 1;
+         ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
+      // integrals
+      shell_indices.push_back(i);
+  }
+  assert(shell_indices.size() == nbf);
+  const auto C = compute_coulomb_matrix(shells);
+  std::vector<size_t> pivot(nbf);
+  for (auto i = 0; i < nbf; ++i) {
+    pivot[i] = i;
+  }
+  // set pivot indices in ascending order of off diagonal elements of Coulomb
+  // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
+  // repulsive interatomic potentials)
+  Eigen::MatrixXd C_off_diag = C;
+  auto col_sum = C_off_diag.colwise().sum();
+  // sort pivot indices in ascending order of column sums
+  std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
+    return col_sum[i1] < col_sum[i2];
+  });
+  // compute Cholesky decomposition
+  const auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
+
+  std::vector<Shell> reduced_shells;
+  for (size_t i = 0; i < reduced_pivots.size(); ++i) {
+    // check if the reduced shell is already in reduced shells
+    if (std::find(reduced_shells.begin(), reduced_shells.end(),
+                  shells[shell_indices[reduced_pivots[i]]]) ==
+        reduced_shells.end())
+      reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
   }
-  return result;
+  return reduced_shells;
 }
 
 /// @brief computes the reduced set of product functions via pivoted Cholesky
@@ -337,8 +253,7 @@ inline std::vector<Shell> shell_pivoted_cholesky(
   }
   assert(shell_indices.size() == nbf);
   Atom dummy_atom;
-  auto C =
-      compute_2indexed_ints(libint2::Operator::coulomb, shells, {dummy_atom});
+  auto C = compute_coulomb_matrix(shells);
 
   for (size_t i = 0; i < C.rows(); ++i) {
     for (size_t j = 0; j < C.cols(); ++j) {
@@ -392,137 +307,57 @@ class DFBasisSetGenerator {
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
   DFBasisSetGenerator(std::string obs_name, const Atom &atom,
-                      const double cholesky_threshold = 1e-7,
-                      bool use_weights = false,
-                      std::string minbs_name = "MINI") {
+                      const double cholesky_threshold = 1e-7) {
     // get AO basis shells for each atom
-    atom_ = atom;
-    obs_ = BasisSet(obs_name, {atom_});
-    minbs_ = BasisSet(minbs_name, {atom_});
+    const auto atom_bs = BasisSet(obs_name, {atom});
+    const auto obs_shells = atom_bs.shells();
+    // get primitive shells from AO functions
+    const auto primitive_shells = detail::uncontract(obs_shells);
+    // compute candidate shells
+    candidate_shells_ = detail::candidate_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
-    use_weights_ = use_weights;
-    if (use_weights_) compute_weights();
-    candidates_ = candidates();
   }
+
   /// @brief constructor for DFBS generator class, generates density fitting
   /// basis set from products of AO shells provided by user
   /// @param cluster vector of vector of shells for each atom
   /// @param cholesky_threshold threshold for choosing a product functions via
   /// pivoted Cholesky decomposition
-  DFBasisSetGenerator(const std::vector<Shell> &shells, const Atom &atom,
-                      const double cholesky_threshold = 1e-7,
-                      bool use_weights = false,
-                      const std::vector<Shell> &mini_shells = {}) {
-    atom_ = atom;
-    obs_ = BasisSet(shells);
-    minbs_ = BasisSet(mini_shells);
+  DFBasisSetGenerator(const std::vector<Shell> &shells,
+                      const double cholesky_threshold = 1e-7) {
+    const auto primitive_shells = detail::uncontract(shells);
+    candidate_shells_ = detail::candidate_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
-    use_weights_ = use_weights;
-    if (use_weights_) {
-      compute_weights();
-    }
-    candidates_ = candidates();
   }
 
   DFBasisSetGenerator() = default;
 
   ~DFBasisSetGenerator() = default;
 
+  /// @brief returns the candidate shells (full set of product functions)
+  std::vector<Shell> candidate_shells() { return candidate_shells_; }
+
   /// @brief returns the reduced shells (reduced set of product functions)
   /// computed via pivoted Cholesky decomposition
   std::vector<Shell> reduced_shells() {
     if (reduced_shells_computed_)
       return reduced_shells_;
     else {
-      const auto candidates_in_L =
-          detail::split_by_L(candidates_, 2 * max_l(obs_.shells()));
-      for (size_t i = 0; i < candidates_in_L.size(); ++i) {
+      const auto candidate_splitted_in_L =
+          detail::split_by_L(candidate_shells_);
+      for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
         std::vector<Shell> reduced_shells_L;
-        if (candidates_in_L[i].size() > 1) {
+        if (candidate_splitted_in_L[i].size() > 1)
           reduced_shells_L = detail::shell_pivoted_cholesky(
-              candidates_in_L[i], cholesky_threshold_);
-        } else
-          reduced_shells_L = {get<0>(candidates_in_L[i][0])};
-
-        std::cout << "Number of shells in L = " << i << " is "
-                  << reduced_shells_L.size() << std::endl;
+              candidate_splitted_in_L[i], cholesky_threshold_);
+        else
+          reduced_shells_L = candidate_splitted_in_L[i];
         reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(),
                                reduced_shells_L.end());
       }
       reduced_shells_computed_ = true;
-      return reduced_shells_;
-    }
-  }
-
-  /// @brief returns a set of weights of primitive obs shells
-  void compute_weights() {
-    const auto T = detail::compute_2indexed_ints(libint2::Operator::kinetic,
-                                                 obs_.shells(), {atom_});
-    const auto V = detail::compute_2indexed_ints(libint2::Operator::nuclear,
-                                                 obs_.shells(), {atom_});
-    auto H = T + V;
-    auto F_soad = compute_soad_fock(obs_, minbs_, {atom_});
-    auto F = H + F_soad;
-    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> es(F);
-    auto C = es.eigenvectors();
-    auto E = es.eigenvalues();
-    auto primitive_bs = BasisSet(detail::uncontract(obs_.shells()));
-    const auto contraction_matrix =
-        detail::contraction_matrix(primitive_bs, obs_);
-    auto C_unc = contraction_matrix * C;
-
-    const auto nocc = atom_.atomic_number / 2;
-    auto C_occ = C_unc.leftCols(nocc);
-    auto D = C_occ * C_occ.transpose();
-
-    // compute square of coefficients
-    //      Eigen::MatrixXd C_unc_sqr(C_unc.rows(),C_unc.cols());
-    //      for(size_t i=0;i<C_unc.rows();++i){
-    //          for(size_t j=0;j<C_unc.cols();++j){
-    //              C_unc_sqr(i,j) = C_unc(i,j)*C_unc(i,j);
-    //          }
-    //      }
-    //
-    //      std::vector<double> prim_func_weights;
-    //
-    //      for(size_t i=0;i<C_unc.rows();++i){
-    //          double max_weight = 0.0;
-    //          for(size_t j=0;j<C_unc.cols();++j){
-    //              max_weight = std::max(max_weight,detail::fd_occupation(E(j))
-    //              * C_unc_sqr(i,j));
-    //          }
-    //          prim_func_weights.push_back(max_weight);
-    //      }
-
-    auto prim_shells = primitive_bs.shells();
-    size_t nfns = 0;
-    for (size_t i = 0; i < prim_shells.size(); ++i) {
-      double max_weight = 0.0;
-      for (size_t j = 0; j < prim_shells[i].size(); ++j) {
-        // max_weight = std::max(max_weight, prim_func_weights[nfns]);
-        max_weight = std::max(max_weight, D.diagonal()(nfns));
-        nfns++;
-      }
-      prim_obs_weights_.push_back(max_weight);
-    }
-  }
-
-  const std::vector<std::tuple<Shell, double>> candidates() {
-    const auto prim_shells = detail::uncontract(obs_.shells());
-
-    // create tuple of shells and their weights
-    std::vector<std::tuple<Shell, double>> prim_shell_weights;
-    for (size_t i = 0; i < prim_shells.size(); ++i) {
-      std::tuple<Shell, double> shell_weight;
-      get<0>(shell_weight) = prim_shells[i];
-      if (use_weights_)
-        get<1>(shell_weight) = prim_obs_weights_[i];
-      else
-        get<1>(shell_weight) = 1.;
-      prim_shell_weights.push_back(shell_weight);
     }
-    const auto candidates = detail::product_functions(prim_shell_weights);
-    return candidates;  // return candidates once done
+    return reduced_shells_;
   }
 
   /// @brief returns the reduced basis set (reduced set of product
@@ -531,16 +366,10 @@ class DFBasisSetGenerator {
   const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
 
  private:
-  BasisSet obs_;
-  BasisSet minbs_;
-  Atom atom_;
   double cholesky_threshold_;
-  std::vector<std::tuple<Shell, double>>
-      candidates_;  // full set of product functions and their weights
-  std::vector<Shell> reduced_shells_;  // reduced set of product functions
+  std::vector<Shell> candidate_shells_;  // full set of product functions
+  std::vector<Shell> reduced_shells_;    // reduced set of product functions
   bool reduced_shells_computed_ = false;
-  bool use_weights_;
-  std::vector<double> prim_obs_weights_;
 };
 
 }  // namespace libint2
diff --git a/include/libint2/soad_fock.h b/include/libint2/soad_fock.h
index d7a8e7dfa..811030c9f 100644
--- a/include/libint2/soad_fock.h
+++ b/include/libint2/soad_fock.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2004-2021 Edward F. Valeev
+ *  Copyright (C) 2004-2024 Edward F. Valeev
  *
  *  This file is part of Libint.
  *
@@ -23,6 +23,7 @@
 
 #include <libint2/atom.h>
 #include <libint2/basis.h>
+#include <libint2/chemistry/sto3g_atomic_density.h>
 
 #include <Eigen/Dense>
 
diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index f0b70acd1..ddbdc4718 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2004-2023 Edward F. Valeev
+ *  Copyright (C) 2004-2024 Edward F. Valeev
  *
  *  This file is part of Libint.
  *
@@ -44,10 +44,10 @@
 #endif  // LIBINT2_HAVE_BTAS
 
 // Libint Gaussian integrals library
-#include <libint2/chemistry/sto3g_atomic_density.h>
 #include <libint2/dfbs_generator.h>
 #include <libint2/diis.h>
 #include <libint2/lcao/molden.h>
+#include <libint2/soad_fock.h>
 #include <libint2/util/intpart_iter.h>
 
 #include <libint2.hpp>
@@ -322,7 +322,8 @@ int main(int argc, char* argv[]) {
       }
       const auto tstop = std::chrono::high_resolution_clock::now();
       const std::chrono::duration<double> time_elapsed = tstop - tstart;
-      std::cout << "computed shell-pair data in " << time_elapsed.count() << " seconds: # of {all,non-negligible} shell-pairs = {"
+      std::cout << "computed shell-pair data in " << time_elapsed.count()
+                << " seconds: # of {all,non-negligible} shell-pairs = {"
                 << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
                 << std::endl;
     }
@@ -359,7 +360,8 @@ int main(int argc, char* argv[]) {
     {  // use SOAD as the guess density
       const auto tstart = std::chrono::high_resolution_clock::now();
 
-      auto D_minbs = compute_soad(atoms);  // compute guess in minimal basis
+      auto D_minbs =
+          libint2::compute_soad(atoms);  // compute guess in minimal basis
       BasisSet minbs("STO-3G", atoms);
       if (minbs == obs)
         D = D_minbs;
@@ -707,7 +709,7 @@ int main(int argc, char* argv[]) {
     {  // compute hessian
       const auto ncoords = 3 * atoms.size();
       // # of elems in upper triangle
-      const auto nelem =  ncoords * (ncoords+1) / 2;
+      const auto nelem = ncoords * (ncoords + 1) / 2;
 #if LIBINT2_DERIV_ONEBODY_ORDER > 1
       // compute 1-e hessian
       Matrix H1 = Matrix::Zero(ncoords, ncoords);
@@ -2491,8 +2493,8 @@ void api_basic_compile_test(const BasisSet& obs,
         auto n2 = obs[s2].size();  // number of basis functions in second shell
 
         // loop over derivative shell sets
-        for(auto d=0; d!=6; ++d) {
-          const auto* buf4 = results4[d<3 ? d : d+3];
+        for (auto d = 0; d != 6; ++d) {
+          const auto* buf4 = results4[d < 3 ? d : d + 3];
           const auto* buf2 = results2[d];
 
           // this iterates over integrals in the order they are packed in array
diff --git a/tests/unit/test-df-autogen.cc b/tests/unit/test-df-autogen.cc
index d8fbe272f..71bca8db3 100644
--- a/tests/unit/test-df-autogen.cc
+++ b/tests/unit/test-df-autogen.cc
@@ -12,8 +12,8 @@
 
 Eigen::Tensor<double, 3> compute_eri3(const BasisSet &obs,
                                       const BasisSet &dfbs) {
-  const auto nshells = obs.size();
-  const auto nshells_df = dfbs.size();
+  long nshells = obs.size();
+  long nshells_df = dfbs.size();
   const auto &unitshell = libint2::Shell::unit();
 
   // construct the 2-electron 3-center repulsion integrals engine
@@ -25,27 +25,27 @@ Eigen::Tensor<double, 3> compute_eri3(const BasisSet &obs,
   engine.set(BraKet::xs_xx);
   const auto shell2bf = obs.shell2bf();
   const auto shell2bf_df = dfbs.shell2bf();
-  const auto nbf = obs.nbf();
-  const auto ndf = dfbs.nbf();
+  long nbf = obs.nbf();
+  long ndf = dfbs.nbf();
 
   Eigen::Tensor<double, 3> result(ndf, nbf, nbf);
   const auto &buf = engine.results();
-  for (auto s1 = 0; s1 != nshells_df; ++s1) {
-    const auto bf1 = shell2bf_df[s1];
-    const auto n1 = dfbs[s1].size();
-    for (auto s2 = 0; s2 != nshells; ++s2) {
-      const auto bf2 = shell2bf[s2];
-      const auto n2 = obs[s2].size();
-      for (auto s3 = 0; s3 != nshells;
+  for (long s1 = 0; s1 != nshells_df; ++s1) {
+    long bf1 = shell2bf_df[s1];
+    long n1 = dfbs[s1].size();
+    for (long s2 = 0; s2 != nshells; ++s2) {
+      long bf2 = shell2bf[s2];
+      long n2 = obs[s2].size();
+      for (long s3 = 0; s3 != nshells;
            ++s3) {  // only loop over unique ao shells
-        const auto bf3 = shell2bf[s3];
-        const auto n3 = obs[s3].size();
+        long bf3 = shell2bf[s3];
+        long n3 = obs[s3].size();
         engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
             dfbs[s1], unitshell, obs[s2], obs[s3]);
-        size_t fij = 0;
-        for (auto f = 0; f != n1; ++f) {
-          for (auto i = 0; i != n2; ++i) {
-            for (auto j = 0; j != n3; ++j) {
+        long fij = 0;
+        for (long f = 0; f != n1; ++f) {
+          for (long i = 0; i != n2; ++i) {
+            for (long j = 0; j != n3; ++j) {
               result(bf1 + f, bf2 + i, bf3 + j) = buf[0][fij];
               ++fij;
             }
@@ -59,7 +59,7 @@ Eigen::Tensor<double, 3> compute_eri3(const BasisSet &obs,
 }
 
 Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) {
-  const auto ndf = dfbs.nbf();
+  long ndf = dfbs.nbf();
   Eigen::MatrixXd result(ndf, ndf);
   result.fill(0.0);
   libint2::Engine engine(libint2::Operator::coulomb, dfbs.max_nprim(),
@@ -68,12 +68,12 @@ Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) {
   engine.set(libint2::ScreeningMethod::Conservative);
   const auto shell2bf_df = dfbs.shell2bf();
   const auto &buf = engine.results();
-  for (size_t s1 = 0; s1 != dfbs.size(); ++s1) {
-    const size_t bf1 = shell2bf_df[s1];
-    const size_t n1 = dfbs[s1].size();
-    for (size_t s2 = 0; s2 != dfbs.size(); ++s2) {
-      const size_t bf2 = shell2bf_df[s2];
-      const size_t n2 = dfbs[s2].size();
+  for (long s1 = 0; s1 != dfbs.size(); ++s1) {
+    long bf1 = shell2bf_df[s1];
+    long n1 = dfbs[s1].size();
+    for (long s2 = 0; s2 != dfbs.size(); ++s2) {
+      long bf2 = shell2bf_df[s2];
+      long n2 = dfbs[s2].size();
       engine.compute(dfbs[s1], dfbs[s2]);
       // "map" buffer to a const Eigen Matrix, and copy it to the corresponding
       // blocks of the result
@@ -85,36 +85,36 @@ Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) {
 }
 
 Eigen::Tensor<double, 4> compute_eri4(const BasisSet &obs) {
-  const auto nshells = obs.size();
+  long nshells = obs.size();
   libint2::Engine engine(libint2::Operator::coulomb,
                          std::max(obs.max_nprim(), obs.max_nprim()),
                          std::max(obs.max_l(), obs.max_l()), 0);
   engine.set(BraKet::xx_xx);
   const auto shell2bf = obs.shell2bf();
-  const auto nbf = obs.nbf();
+  long nbf = obs.nbf();
 
   Eigen::Tensor<double, 4> result(nbf, nbf, nbf, nbf);
   const auto &buf = engine.results();
-  for (auto s1 = 0; s1 != nshells; ++s1) {
-    const auto bf1 = shell2bf[s1];
-    const auto n1 = obs[s1].size();
+  for (long s1 = 0; s1 != nshells; ++s1) {
+    long bf1 = shell2bf[s1];
+    long n1 = obs[s1].size();
     // TODO: loop over all unique quartets: s1 >= s2 >= s3 >= s4
-    for (auto s2 = 0; s2 != nshells; ++s2) {
-      const auto bf2 = shell2bf[s2];
-      const auto n2 = obs[s2].size();
-      for (auto s3 = 0; s3 != nshells; ++s3) {
-        const auto bf3 = shell2bf[s3];
-        const auto n3 = obs[s3].size();
-        for (auto s4 = 0; s4 != nshells; ++s4) {
-          const auto bf4 = shell2bf[s4];
-          const auto n4 = obs[s4].size();
+    for (long s2 = 0; s2 != nshells; ++s2) {
+      long bf2 = shell2bf[s2];
+      long n2 = obs[s2].size();
+      for (long s3 = 0; s3 != nshells; ++s3) {
+        long bf3 = shell2bf[s3];
+        long n3 = obs[s3].size();
+        for (long s4 = 0; s4 != nshells; ++s4) {
+          long bf4 = shell2bf[s4];
+          long n4 = obs[s4].size();
           engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
               obs[s1], obs[s2], obs[s3], obs[s4]);
-          size_t ijkl = 0;
-          for (auto i = 0; i != n1; ++i) {
-            for (auto j = 0; j != n2; ++j) {
-              for (auto k = 0; k != n3; ++k) {
-                for (auto l = 0; l != n4; ++l) {
+          long ijkl = 0;
+          for (long i = 0; i != n1; ++i) {
+            for (long j = 0; j != n2; ++j) {
+              for (long k = 0; k != n3; ++k) {
+                for (long l = 0; l != n4; ++l) {
                   result(bf1 + i, bf2 + j, bf3 + k, bf4 + l) = buf[0][ijkl];
                   ++ijkl;
                 }
@@ -139,12 +139,12 @@ TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
 
   // Using STO-3G basis set for Neon
   libint2::BasisSet bs("STO-3G", {atom});
-  const auto nao = bs.nbf();
+  long nao = bs.nbf();
 
   // Use automatic DF basis set generator with tight cholesky threshold
   libint2::DFBasisSetGenerator dfbs_generator(bs.shells(), 1e-4);
   const auto dfbs = dfbs_generator.reduced_basis();
-  const auto ndf = dfbs.nbf();
+  long ndf = dfbs.nbf();
 
   // Compute 2-center integrals
   const auto eri2 = compute_eri2(dfbs);
@@ -165,12 +165,12 @@ TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
   // reconstruct 4-center integrals and compare with original
   double norm = 0.0;
   Eigen::Tensor<double, 4> eri4_df(nao, nao, nao, nao);
-  for (size_t i = 0; i < nao; i++) {
-    for (size_t j = 0; j < nao; j++) {
-      for (size_t k = 0; k < nao; k++) {
-        for (size_t l = 0; l < nao; l++) {
-          for (size_t X = 0; X < ndf; X++) {
-            for (size_t Y = 0; Y < ndf; Y++) {
+  for (long i = 0; i < nao; i++) {
+    for (long j = 0; j < nao; j++) {
+      for (long k = 0; k < nao; k++) {
+        for (long l = 0; l < nao; l++) {
+          for (long X = 0; X < ndf; X++) {
+            for (long Y = 0; Y < ndf; Y++) {
               eri4_df(i, j, k, l) +=
                   eri3(X, i, j) * eri2_inv(X, Y) * eri3(Y, k, l);
             }
@@ -182,3 +182,4 @@ TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
   }
   REQUIRE_NOTHROW(norm < 1e-10);
 }
+//

From 48ab8d90e098c538258bf8320f31248ff7097cb4 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Wed, 24 Jan 2024 16:49:15 -0500
Subject: [PATCH 28/44] removed function overload of `shell_pivoted_cholesky`

---
 include/libint2/dfbs_generator.h | 72 --------------------------------
 1 file changed, 72 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index b4ba77d27..946cf2f56 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -216,78 +216,6 @@ inline std::vector<Shell> shell_pivoted_cholesky(
   }
   return reduced_shells;
 }
-
-/// @brief computes the reduced set of product functions via pivoted Cholesky
-/// decomposition
-/// @param shells_weights set of tuples of shells and their weights
-/// @param cholesky_threshold threshold for choosing a product function via
-/// pivoted Cholesky decomposition
-/// @return reduced set of product functions
-
-inline std::vector<Shell> shell_pivoted_cholesky(
-    const std::vector<std::tuple<Shell, double>> &shells_weights,
-    const double cholesky_threshold) {
-  std::vector<Shell> shells;
-  std::vector<double> weights;
-
-  for (auto &&shell_weight : shells_weights) {
-    const auto shell = get<0>(shell_weight);
-    shells.push_back(shell);
-    const auto L = shell.contr[0].l;
-    for (size_t l = 0; l < 2 * L + 1; ++l) {
-      weights.push_back(get<1>(shell_weight));
-    }
-  }
-
-  const auto n = shells.size();  // number of shells
-  std::vector<size_t>
-      shell_indices;  // hash map of basis function indices to shell indices
-  const auto L = shells[0].contr[0].l;  // all shells must have same L
-  const auto nbf = libint2::nbf(
-      shells);  // total number of basis functions in vector of shells
-  for (size_t i = 0; i < n; ++i) {
-    for (size_t j = 0; j < 2 * L + 1;
-         ++j)  // 2L+1 since libint2 strictly uses solid harmonics for 2c2b
-      // integrals
-      shell_indices.push_back(i);
-  }
-  assert(shell_indices.size() == nbf);
-  Atom dummy_atom;
-  auto C = compute_coulomb_matrix(shells);
-
-  for (size_t i = 0; i < C.rows(); ++i) {
-    for (size_t j = 0; j < C.cols(); ++j) {
-      C(i, j) *= std::sqrt(weights[i] * weights[j]);
-    }
-  }
-
-  std::vector<size_t> pivot(nbf);
-  for (auto i = 0; i < nbf; ++i) {
-    pivot[i] = i;
-  }
-  // set pivot indices in ascending order of off diagonal elements of Coulomb
-  // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly
-  // repulsive interatomic potentials)
-  Eigen::MatrixXd C_off_diag = C;
-  auto col_sum = C_off_diag.colwise().sum();
-  // sort pivot indices in ascending order of column sums
-  std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) {
-    return col_sum[i1] < col_sum[i2];
-  });
-  // compute Cholesky decomposition
-  const auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot);
-
-  std::vector<Shell> reduced_shells;
-  for (size_t i = 0; i < reduced_pivots.size(); ++i) {
-    // check if the reduced shell is already in reduced shells
-    if (std::find(reduced_shells.begin(), reduced_shells.end(),
-                  shells[shell_indices[reduced_pivots[i]]]) ==
-        reduced_shells.end())
-      reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]);
-  }
-  return reduced_shells;
-}
-
 }  // namespace detail
 
 /// @brief class produces density fitting basis sets from products of AO basis

From 43e79c0ebf68fc9e2af928d7a1b40064a504bab3 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 25 Jan 2024 09:39:32 -0500
Subject: [PATCH 29/44] enabled '--with-eri2' in CI configuration

---
 .github/workflows/cmake.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index ce187abc9..d5bd00389 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,2 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,2 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..

From a3e73c5903526d7e743bf9c85f472a924c3de9cb Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 25 Jan 2024 09:59:20 -0500
Subject: [PATCH 30/44] to use autoDF `hartree-fock++.cc` uses
 `autodf(chol_tol)` as command line argument

---
 tests/hartree-fock/hartree-fock++.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc
index ddbdc4718..036fd4a7e 100644
--- a/tests/hartree-fock/hartree-fock++.cc
+++ b/tests/hartree-fock/hartree-fock++.cc
@@ -224,9 +224,12 @@ int main(int argc, char* argv[]) {
     double cholesky_threshold = 1e-4;
 #ifdef HAVE_DENSITY_FITTING
     do_density_fitting = (argc > 3);
-    const auto dfbasisname = do_density_fitting ? argv[3] : "";
-    if ((strcmp(dfbasisname, "autodf") == 0) && argc > 4) {
-      cholesky_threshold = std::stod(argv[4]);
+    const std::string dfbasisname = do_density_fitting ? argv[3] : "";
+
+    // if autoDF use e.g. -> autodf(1e-6) as command line argument
+    if (dfbasisname.rfind("autodf", 0) == 0) {
+      cholesky_threshold =
+          std::stod(dfbasisname.substr(7, dfbasisname.length() - 8));
       std::cout << "New Cholesky threshold for generating df basis set = "
                 << cholesky_threshold << std::endl;
     }
@@ -291,7 +294,7 @@ int main(int argc, char* argv[]) {
 #ifdef HAVE_DENSITY_FITTING
     BasisSet dfbs;
     if (do_density_fitting) {
-      if (strcmp(dfbasisname, "autodf") == 0) {
+      if (dfbasisname.rfind("autodf", 0) == 0) {
         std::vector<Shell> dfbs_shells;
         for (auto&& atom : atoms) {
           libint2::DFBasisSetGenerator dfbs_generator(basisname, atom,

From 8f817d49684c2baddb71efc51f9f4683d4c4fc6d Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 25 Jan 2024 10:35:47 -0500
Subject: [PATCH 31/44] CI configures with `--enable-eri2=1`

---
 .github/workflows/cmake.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index d5bd00389..a72d2f51d 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,2 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,2 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..

From 580ac6b654f65b91b587d0b8e16deb0407ab2f70 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 12 Feb 2024 16:17:43 -0500
Subject: [PATCH 32/44] cleanup and dox fixes in `dfbs_generator.h`

---
 .github/workflows/cmake.yml      |   2 +-
 include/libint2/dfbs_generator.h | 100 +++++++++++++++++--------------
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index a72d2f51d..05835989a 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,2 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..
diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 946cf2f56..226647bdb 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -36,8 +36,8 @@ namespace libint2 {
 
 namespace detail {
 
-/// Computes uncontracted shells from a vector of shells
-/// @param[in] cluster a vector of shells
+/// @brief Uncontracts a set of shells
+/// @param[in] shells a vector of shells to be uncontracted
 /// @return a vector of uncontracted shells
 inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
   std::vector<Shell> primitive_shells;
@@ -54,12 +54,16 @@ inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
   return primitive_shells;
 }
 
-/// @brief returns \Gamma(x)  of x
+/// @brief returns \Gamma(x)
 inline double gamma_function(const double x) { return std::tgamma(x); }
 
-/// @brief return effective exponent of product of two primitive shells
-/// @param shell1 first shell
-/// @param shell2 second shell
+/// @brief Computes an effective exponent for a product of two primitive
+/// gaussians as as described in J. Chem. Theory Comput. 2021, 17, 6886−6900.
+/// \alpha_{eff} = \left[ \frac{\Gamma(L+2)\Gamma(l_\mu +l_\nu +
+/// \frac{3}{2})}{\Gamma(L+ \frac{3}{2})\Gamma(l_\mu +l_\nu + 2)} \right]^2
+/// (\alpha_\mu + \alpha_\nu)
+/// @param shell1 first primitive shell
+/// @param shell2 second primitive shell
 /// @param L total angular momentum of product function
 /// @return effective exponent of product function
 inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
@@ -74,8 +78,13 @@ inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
   return prefactor * (alpha1 + alpha2);
 }
 
-/// @brief creates a set of product functions from a set of primitive shells
+/// @brief Creates a set of product functions from a set of primitive shells.
+/// Each pair of primitive shells produces a set of product functions with an
+/// effective exponent (\alpha_{eff} as described above) and angular momentum
+/// ranging from |l1-l2| to l1+l2 as described in J. Chem. Theory Comput. 2021,
+/// 17, 6886−6900.
 /// @param primitive_shells set of primitive shells
+/// @return set of product functions
 inline std::vector<Shell> product_functions(
     const std::vector<Shell> &primitive_shells) {
   std::vector<Shell> product_functions;
@@ -103,15 +112,6 @@ inline std::vector<Shell> product_functions(
   return product_functions;
 }
 
-/// @brief creates a set of candidate product shells from a set of primitive
-/// shells
-/// @param primitive_shells set of primitive shells
-/// @return set of candidate product shells
-inline std::vector<Shell> candidate_functions(
-    const std::vector<Shell> &primitive_shells) {
-  return product_functions(primitive_shells);
-}
-
 /// @brief returns a hash map of shell indices to basis function indices
 inline std::vector<size_t> map_shell_to_basis_function(
     const std::vector<libint2::Shell> &shells) {
@@ -127,7 +127,7 @@ inline std::vector<size_t> map_shell_to_basis_function(
   return result;
 }
 
-/// @brief computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
+/// @brief Computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
 /// @param shells set of shells
 /// @return Coulomb matrix
 inline Eigen::MatrixXd compute_coulomb_matrix(
@@ -155,7 +155,10 @@ inline Eigen::MatrixXd compute_coulomb_matrix(
   return result;
 }
 
-/// @brief Sorts a vector of shells by angular momentum
+/// @brief Splits a set of shells by angular momentum
+/// @param shells set of shells
+/// @return vector of vectors of shells split by angular momentum L. The i-th
+///  vector contains all shells with angular momentum i
 inline std::vector<std::vector<Shell>> split_by_L(
     const std::vector<Shell> &shells) {
   int lmax = max_l(shells);
@@ -168,8 +171,13 @@ inline std::vector<std::vector<Shell>> split_by_L(
   return sorted_shells;
 }
 
-/// @brief computes the reduced set of product functions via pivoted Cholesky
-/// decomposition
+/// @brief Computes the reduced set of product functions via pivoted Cholesky
+/// decomposition as described in J. Chem. Theory Comput. 2021, 17, 6886−6900.
+/// Cholesky decomposition is performed on the Coulomb matrix of the product
+/// functions and the pivot indices are sorted in ascending order of column sums
+/// of the Coulomb matrix. The reduced set of product functions is then
+/// constructed by selecting the product functions corresponding to the pivot
+/// indices.
 /// @param shells set of shells
 /// @param cholesky_threshold threshold for choosing a product function via
 /// pivoted Cholesky decomposition
@@ -218,22 +226,23 @@ inline std::vector<Shell> shell_pivoted_cholesky(
 }
 }  // namespace detail
 
-/// @brief class produces density fitting basis sets from products of AO basis
-/// functions eliminates linearly dependent functions via pivoted Cholesky
-/// decomposition see: J. Chem. Theory Comput. 2021, 17, 6886−6900
-/// (Straightforward and Accurate Automatic Auxiliary Basis Set Generation for
-/// Molecular Calculations with Atomic Orbital Basis Sets)
+/// @brief This class produces density fitting basis sets for an atom from
+/// products of AO basis functions and eliminates linearly dependent functions
+/// via pivoted Cholesky decomposition see: J. Chem. Theory Comput. 2021, 17,
+/// 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set
+/// Generation for Molecular Calculations with Atomic Orbital Basis Sets)
 class DFBasisSetGenerator {
  public:
-  /// @brief constructor for DFBS generator class, generates density fitting
-  /// basis set from products of AO basis functions see: J. Chem. Theory Comput.
-  /// 2021, 17, 6886−6900 (Straightforward and Accurate Automatic Auxiliary
-  /// Basis Set Generation for Molecular Calculations with Atomic Orbital Basis
-  /// Sets)
+  /// @brief constructor for DFBasisSetGenerator class, generates density
+  /// fitting basis set from products of AO basis functions of and atom see: J.
+  /// Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate
+  /// Automatic Auxiliary Basis Set Generation for Molecular Calculations with
+  /// Atomic Orbital Basis Sets)
   /// @param obs_name name of basis set for AO functions
   /// @param atoms vector of atoms
-  /// @param cholesky_threshold threshold for choosing a product functions via
-  /// pivoted Cholesky decomposition
+  /// @param cholesky_threshold threshold for threshold for  pivoted Cholesky
+  /// decomposition to be performed on the Coulomb matrix of the product
+  /// functions
   DFBasisSetGenerator(std::string obs_name, const Atom &atom,
                       const double cholesky_threshold = 1e-7) {
     // get AO basis shells for each atom
@@ -242,19 +251,19 @@ class DFBasisSetGenerator {
     // get primitive shells from AO functions
     const auto primitive_shells = detail::uncontract(obs_shells);
     // compute candidate shells
-    candidate_shells_ = detail::candidate_functions(primitive_shells);
+    candidate_shells_ = detail::product_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
   }
 
-  /// @brief constructor for DFBS generator class, generates density fitting
-  /// basis set from products of AO shells provided by user
-  /// @param cluster vector of vector of shells for each atom
-  /// @param cholesky_threshold threshold for choosing a product functions via
-  /// pivoted Cholesky decomposition
+  /// @brief constructor for DFBasisSetGenerator class, generates density
+  /// fitting basis set from products of AO shells provided by user
+  /// @param shells vector of vector of shells for each atom
+  /// @param cholesky_threshold threshold for  pivoted Cholesky decomposition to
+  /// be performed on the Coulomb matrix of the product functions
   DFBasisSetGenerator(const std::vector<Shell> &shells,
                       const double cholesky_threshold = 1e-7) {
     const auto primitive_shells = detail::uncontract(shells);
-    candidate_shells_ = detail::candidate_functions(primitive_shells);
+    candidate_shells_ = detail::product_functions(primitive_shells);
     cholesky_threshold_ = cholesky_threshold;
   }
 
@@ -265,8 +274,10 @@ class DFBasisSetGenerator {
   /// @brief returns the candidate shells (full set of product functions)
   std::vector<Shell> candidate_shells() { return candidate_shells_; }
 
-  /// @brief returns the reduced shells (reduced set of product functions)
-  /// computed via pivoted Cholesky decomposition
+  /// @brief returns a set of shells reduced via pivoted Cholesky
+  /// decomposition of the Coulomb matrix of the product functions for each
+  /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17,
+  /// 6886−6900.
   std::vector<Shell> reduced_shells() {
     if (reduced_shells_computed_)
       return reduced_shells_;
@@ -288,9 +299,10 @@ class DFBasisSetGenerator {
     return reduced_shells_;
   }
 
-  /// @brief returns the reduced basis set (reduced set of product
-  /// functions)
-  /// computed via pivoted Cholesky decomposition
+  /// @brief returns a BasisSet of shells reduced via pivoted Cholesky
+  /// decomposition of the Coulomb matrix of the product functions for each
+  /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17,
+  /// 6886−6900.
   const BasisSet reduced_basis() { return BasisSet(reduced_shells()); }
 
  private:

From 8665657b4d06fd3de599025df1eb1b8f7e209597 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 12 Feb 2024 18:09:04 -0500
Subject: [PATCH 33/44] revert changes to ci configuration to compute 1st order
 derivatives of eri2 and eri3

---
 .github/workflows/cmake.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 05835989a..3c131debe 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=0 --enable-eri3=0 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..

From 1c239c49896659e53d563b413f8794ddc9d2cae2 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Mon, 12 Feb 2024 18:10:41 -0500
Subject: [PATCH 34/44] revert changes to ci configuration to compute 1st order
 derivatives of eri2 and eri3

---
 .github/workflows/cmake.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 05835989a..134967511 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -105,7 +105,7 @@ jobs:
       shell: bash
       working-directory: ${{github.workspace}}/build/compiler
       run: |
-        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
+        CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=0 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2
         make -j3
         make check
         cd src/bin/test_eri && ./stdtests.pl && cd ../../..

From 3f0f729d118b09b3abc38f6d3c4c834ca8382f2c Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Wed, 14 Feb 2024 15:50:26 -0500
Subject: [PATCH 35/44] renamed `shell_pivoted_cholesky()` to
 `pivoted_cholesky_in_L()`

---
 include/libint2/dfbs_generator.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index 226647bdb..b76f626e2 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -182,7 +182,7 @@ inline std::vector<std::vector<Shell>> split_by_L(
 /// @param cholesky_threshold threshold for choosing a product function via
 /// pivoted Cholesky decomposition
 /// @return reduced set of product functions
-inline std::vector<Shell> shell_pivoted_cholesky(
+inline std::vector<Shell> pivoted_cholesky_in_L(
     const std::vector<Shell> &shells, const double cholesky_threshold) {
   const auto n = shells.size();  // number of shells
   std::vector<size_t>
@@ -287,7 +287,7 @@ class DFBasisSetGenerator {
       for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) {
         std::vector<Shell> reduced_shells_L;
         if (candidate_splitted_in_L[i].size() > 1)
-          reduced_shells_L = detail::shell_pivoted_cholesky(
+          reduced_shells_L = detail::pivoted_cholesky_in_L(
               candidate_splitted_in_L[i], cholesky_threshold_);
         else
           reduced_shells_L = candidate_splitted_in_L[i];

From 7f2bbd70c0fd7f6e4345b404711ee4d06ecd83ad Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 22 Feb 2024 00:01:43 -0500
Subject: [PATCH 36/44] use `\f$` to embed LaTeX into dox of
 `DFBasisSetGenerator` and define `Engine` with `Braket` for coulomb integrals

---
 include/libint2/dfbs_generator.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index b76f626e2..a639c6dd2 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -54,14 +54,14 @@ inline std::vector<Shell> uncontract(const std::vector<Shell> &shells) {
   return primitive_shells;
 }
 
-/// @brief returns \Gamma(x)
+/// @brief returns \f$ \Gamma(x) \f$
 inline double gamma_function(const double x) { return std::tgamma(x); }
 
 /// @brief Computes an effective exponent for a product of two primitive
 /// gaussians as as described in J. Chem. Theory Comput. 2021, 17, 6886−6900.
-/// \alpha_{eff} = \left[ \frac{\Gamma(L+2)\Gamma(l_\mu +l_\nu +
+/// \f$ \alpha_{eff} = \left[ \frac{\Gamma(L+2)\Gamma(l_\mu +l_\nu +
 /// \frac{3}{2})}{\Gamma(L+ \frac{3}{2})\Gamma(l_\mu +l_\nu + 2)} \right]^2
-/// (\alpha_\mu + \alpha_\nu)
+/// (\alpha_\mu + \alpha_\nu) \f$
 /// @param shell1 first primitive shell
 /// @param shell2 second primitive shell
 /// @param L total angular momentum of product function
@@ -80,9 +80,9 @@ inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) {
 
 /// @brief Creates a set of product functions from a set of primitive shells.
 /// Each pair of primitive shells produces a set of product functions with an
-/// effective exponent (\alpha_{eff} as described above) and angular momentum
-/// ranging from |l1-l2| to l1+l2 as described in J. Chem. Theory Comput. 2021,
-/// 17, 6886−6900.
+/// effective exponent (\f$ \alpha_{eff} \f$ as described above) and angular
+/// momentum ranging from \f$ |l1-l2| \leq L \leq l1+l2 \f$ as described in J.
+/// Chem. Theory Comput. 2021, 17, 6886−6900.
 /// @param primitive_shells set of primitive shells
 /// @return set of product functions
 inline std::vector<Shell> product_functions(
@@ -127,7 +127,8 @@ inline std::vector<size_t> map_shell_to_basis_function(
   return result;
 }
 
-/// @brief Computes the Coulomb matrix (\mu|rij^{-1}|\nu)  for a set of shells
+/// @brief Computes the Coulomb matrix \f$ (\mu|rij^{-1}|\nu) \f$  for a set of
+/// shells
 /// @param shells set of shells
 /// @return Coulomb matrix
 inline Eigen::MatrixXd compute_coulomb_matrix(
@@ -135,8 +136,11 @@ inline Eigen::MatrixXd compute_coulomb_matrix(
   const auto n = nbf(shells);
   Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n);
   using libint2::Engine;
-  Engine engine(libint2::Operator::coulomb, max_nprim(shells), max_l(shells));
-  engine.set(BraKet::xs_xs);
+  const auto oper = libint2::Operator::coulomb;
+  const auto braket = libint2::BraKet::xs_xs;
+  Engine engine(oper, max_nprim(shells), max_l(shells), 0,
+                std::numeric_limits<scalar_type>::epsilon(),
+                libint2::default_params(oper), braket);
   engine.set(ScreeningMethod::Conservative);
   const auto shell2bf = map_shell_to_basis_function(shells);
   const auto &buf = engine.results();

From cbe514457b8066eeb9cb165abdcb41b8b104fe84 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Thu, 22 Feb 2024 13:12:10 -0500
Subject: [PATCH 37/44] can configure with `--with-eri3-max-am=12` and
 `--with-eri2-max-am=12`

---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index df08efb8b..8ca43e3e3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -528,7 +528,7 @@ AS_HELP_STRING([--with-eri3-max-am=N],[Support 3-center ERIs for Gaussians of an
      else
       ERI3_MAX_AM=$withval
      fi
-     if test $ERI3_MAX_AM -ge $LIBINT_HARD_MAX_AM; then
+     if test $ERI3_MAX_AM -gt $LIBINT_HARD_MAX_AM; then
       AC_MSG_ERROR([Value for --with-eri3-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI3_MAX_AM)
@@ -599,7 +599,7 @@ AS_HELP_STRING([--with-eri2-max-am=N],[Support 2-center ERIs for Gaussians of an
      else
       ERI2_MAX_AM=$withval
      fi
-     if test $ERI2_MAX_AM -ge $LIBINT_HARD_MAX_AM; then
+     if test $ERI2_MAX_AM -gt $LIBINT_HARD_MAX_AM; then
       AC_MSG_ERROR([Value for --with-eri2-max-am too high ($withval). Are you sure you know what you are doing?])
      fi
      AC_SUBST(ERI2_MAX_AM)

From e8b9b0ef5f777cf6e82eea010ac21540909333b5 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 12:56:41 -0500
Subject: [PATCH 38/44] [skip ci] dox++

---
 include/libint2/soad_fock.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/libint2/soad_fock.h b/include/libint2/soad_fock.h
index 811030c9f..f3796e539 100644
--- a/include/libint2/soad_fock.h
+++ b/include/libint2/soad_fock.h
@@ -29,10 +29,15 @@
 
 namespace libint2 {
 
-// computes Superposition-Of-Atomic-Densities guess for the molecular density
-// matrix
-// in minimal basis; occupies subshells by smearing electrons evenly over the
-// orbitals
+/// @brief computes Superposition-Of-Atomic-Densities guess for the molecular
+/// 1-RDM
+
+/// The atomic densities are represented by the STO-3G AOs;
+/// occupies subshells by smearing electrons (of neutral ground-state atoms)
+/// evenly over the subshell components
+/// @param[in] atoms list of atoms
+/// @return the 1-RDM in the STO-3G AO basis
+/// @return the 1-RDM, normalized to the # of electrons/2
 Eigen::MatrixXd compute_soad(const std::vector<Atom> &atoms) {
   // compute number of atomic orbitals
   size_t nao = 0;

From 8ca894854cce51d2b2420117164d24464a55853f Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 12:20:29 -0500
Subject: [PATCH 39/44] C -> C++ headers

---
 include/libint2/dfbs_generator.h       | 2 +-
 include/libint2/util/intrinsic_types.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h
index a639c6dd2..750f54b69 100644
--- a/include/libint2/dfbs_generator.h
+++ b/include/libint2/dfbs_generator.h
@@ -26,11 +26,11 @@
 #include <libint2/basis.h>
 #include <libint2/boys.h>
 #include <libint2/pivoted_cholesky.h>
-#include <math.h>
 
 #include <Eigen/Dense>
 #include <Eigen/Eigenvalues>
 #include <algorithm>
+#include <cmath>
 
 namespace libint2 {
 
diff --git a/include/libint2/util/intrinsic_types.h b/include/libint2/util/intrinsic_types.h
index fc874d394..8f333f1d8 100644
--- a/include/libint2/util/intrinsic_types.h
+++ b/include/libint2/util/intrinsic_types.h
@@ -22,12 +22,20 @@
 #define _libint2_include_libint2intrinsictypes_h_
 
 #include <libint2/config.h>
+#ifdef __cplusplus
+#include <climits>
+#else
 #include <limits.h>
+#endif
 
 /* determine default LIBINT2 64-bit integer */
 #ifdef HAVE_STDINT_H
 
+#ifdef __cplusplus
+#include <cstdint>
+#else
 #include <stdint.h>
+#endif
 /* because mpz_class does not mesh with long long types, only use those when
  * absolutely necessary */
 #if UINT_LEAST64_MAX != ULONG_MAX

From e9c577dbd104fc5bfbe5867c5308bf02d5a11531 Mon Sep 17 00:00:00 2001
From: Kshitij Surjuse <kshitijsurjuse100@gmail.com>
Date: Fri, 23 Feb 2024 10:30:09 -0500
Subject: [PATCH 40/44] factorials and related quantities are computed in a
 safer manner, with graceful switching to inexact arithmetic for large
 argument values ... this allows to compute SolidHarmonicsCoefficients for
 arbitrary (in practice) L

---
 include/libint2/shell.h          | 635 +++++++++++++++++++++++++++++--
 include/libint2/solidharmonics.h |  40 +-
 tests/unit/test-core.cc          |  14 +
 3 files changed, 650 insertions(+), 39 deletions(-)

diff --git a/include/libint2/shell.h b/include/libint2/shell.h
index 44c705b82..33422a143 100644
--- a/include/libint2/shell.h
+++ b/include/libint2/shell.h
@@ -33,12 +33,14 @@
 #include <cassert>
 #include <cmath>
 #include <iostream>
+#include <type_traits>
 #include <vector>
 
 namespace libint2 {
 
 namespace math {
-/// fac[k] = k!
+
+/// @brief fac[k] = k!, `0<=k<=20`
 static constexpr std::array<int64_t, 21> fac = {{1LL,
                                                  1LL,
                                                  2LL,
@@ -60,8 +62,224 @@ static constexpr std::array<int64_t, 21> fac = {{1LL,
                                                  6402373705728000LL,
                                                  121645100408832000LL,
                                                  2432902008176640000LL}};
-/// df_Kminus1[k] = (k-1)!!
-static constexpr std::array<int64_t, 31> df_Kminus1 = {{1LL,
+
+/// @brief fac_ld[k] = static_cast<long double>(k!), `0<=k<=170`
+/// @note This is an inexact representation for most values, so should only be
+/// used when `fac` does not
+///       suffice, i.e. for `k>=21`.
+/// @note This should be sufficient for computing with `double` reals
+static constexpr std::array<long double, 171> fac_ld = {
+    {1.l,
+     1.l,
+     2.l,
+     6.l,
+     24.l,
+     120.l,
+     720.l,
+     5040.l,
+     40320.l,
+     362880.l,
+     3.6288e6l,
+     3.99168e7l,
+     4.790016e8l,
+     6.2270208e9l,
+     8.71782912e10l,
+     1.307674368e12l,
+     2.0922789888e13l,
+     3.55687428096e14l,
+     6.402373705728e15l,
+     1.21645100408832e17l,
+     2.43290200817664e18l,
+     5.109094217170944e19l,
+     1.12400072777760768e21l,
+     2.585201673888497664e22l,
+     6.2044840173323943936e23l,
+     1.5511210043330985984e25l,
+     4.03291461126605635584e26l,
+     1.0888869450418352160768e28l,
+     3.04888344611713860501504e29l,
+     8.841761993739701954543616e30l,
+     2.6525285981219105863630848e32l,
+     8.22283865417792281772556288e33l,
+     2.6313083693369353016721801216e35l,
+     8.68331761881188649551819440128e36l,
+     2.9523279903960414084761860964352e38l,
+     1.03331479663861449296666513375232e40l,
+     3.719933267899012174679994481508352e41l,
+     1.37637530912263450463159795815809024e43l,
+     5.23022617466601111760007224100074291e44l,
+     2.03978820811974433586402817399028974e46l,
+     8.15915283247897734345611269596115894e47l,
+     3.34525266131638071081700620534407517e49l,
+     1.40500611775287989854314260624451157e51l,
+     6.04152630633738356373551320685139975e52l,
+     2.65827157478844876804362581101461589e54l,
+     1.19622220865480194561963161495657715e56l,
+     5.50262215981208894985030542880025489e57l,
+     2.5862324151116818064296435515361198e59l,
+     1.2413915592536072670862289047373375e61l,
+     6.08281864034267560872252163321295377e62l,
+     3.04140932017133780436126081660647688e64l,
+     1.55111875328738228022424301646930321e66l,
+     8.0658175170943878571660636856403767e67l,
+     4.27488328406002556429801375338939965e69l,
+     2.30843697339241380472092742683027581e71l,
+     1.2696403353658275925965100847566517e73l,
+     7.1099858780486345185404564746372495e74l,
+     4.05269195048772167556806019054323221e76l,
+     2.35056133128287857182947491051507468e78l,
+     1.38683118545689835737939019720389406e80l,
+     8.32098711274139014427634118322336438e81l,
+     5.07580213877224798800856812176625227e83l,
+     3.14699732603879375256531223549507641e85l,
+     1.98260831540444006411614670836189814e87l,
+     1.26886932185884164103433389335161481e89l,
+     8.24765059208247066672317030678549625e90l,
+     5.44344939077443064003729240247842753e92l,
+     3.64711109181886852882498590966054644e94l,
+     2.48003554243683059960099041856917158e96l,
+     1.71122452428141311372468338881272839e98l,
+     1.19785716699698917960727837216890987e100l,
+     8.5047858856786231752116764423992601e101l,
+     6.12344583768860868615240703852746727e103l,
+     4.47011546151268434089125713812505111e105l,
+     3.30788544151938641225953028221253782e107l,
+     2.48091408113953980919464771165940337e109l,
+     1.88549470166605025498793226086114656e111l,
+     1.45183092028285869634070784086308285e113l,
+     1.13242811782062978314575211587320462e115l,
+     8.94618213078297528685144171539831652e116l,
+     7.15694570462638022948115337231865322e118l,
+     5.79712602074736798587973423157810911e120l,
+     4.75364333701284174842138206989404947e122l,
+     3.94552396972065865118974711801206106e124l,
+     3.31424013456535326699938757913013129e126l,
+     2.81710411438055027694947944226061159e128l,
+     2.42270953836727323817655232034412597e130l,
+     2.1077572983795277172136005186993896e132l,
+     1.85482642257398439114796845645546284e134l,
+     1.65079551609084610812169192624536193e136l,
+     1.48571596448176149730952273362082574e138l,
+     1.35200152767840296255166568759495142e140l,
+     1.24384140546413072554753243258735531e142l,
+     1.15677250708164157475920516230624044e144l,
+     1.08736615665674308027365285256786601e146l,
+     1.03299784882390592625997020993947271e148l,
+     9.91677934870949689209571401541893801e149l,
+     9.61927596824821198533284259495636987e151l,
+     9.42689044888324774562618574305724247e153l,
+     9.33262154439441526816992388562667005e155l,
+     9.33262154439441526816992388562667005e157l,
+     9.42594775983835942085162312448293675e159l,
+     9.61446671503512660926865558697259548e161l,
+     9.90290071648618040754671525458177335e163l,
+     1.02990167451456276238485838647650443e166l,
+     1.08139675824029090050410130580032965e168l,
+     1.14628056373470835453434738414834943e170l,
+     1.22652020319613793935175170103873389e172l,
+     1.3246418194518289744998918371218326e174l,
+     1.44385958320249358220488210246279753e176l,
+     1.58824554152274294042537031270907729e178l,
+     1.76295255109024466387216104710707579e180l,
+     1.97450685722107402353682037275992488e182l,
+     2.23119274865981364659660702121871512e184l,
+     2.54355973347218755712013200418933523e186l,
+     2.92509369349301569068815180481773552e188l,
+     3.3931086844518982011982560935885732e190l,
+     3.96993716080872089540195962949863065e192l,
+     4.68452584975429065657431236280838416e194l,
+     5.57458576120760588132343171174197716e196l,
+     6.68950291344912705758811805409037259e198l,
+     8.09429852527344373968162284544935083e200l,
+     9.87504420083360136241157987144820801e202l,
+     1.21463043670253296757662432418812959e205l,
+     1.50614174151114087979501416199328069e207l,
+     1.88267717688892609974376770249160086e209l,
+     2.37217324288004688567714730513941708e211l,
+     3.01266001845765954480997707752705969e213l,
+     3.85620482362580421735677065923463641e215l,
+     4.97450422247728744039023415041268096e217l,
+     6.46685548922047367250730439553648525e219l,
+     8.47158069087882051098456875815279568e221l,
+     1.11824865119600430744996307607616903e224l,
+     1.48727070609068572890845089118130481e226l,
+     1.99294274616151887673732419418294845e228l,
+     2.6904727073180504835953876621469804e230l,
+     3.65904288195254865768972722051989335e232l,
+     5.01288874827499166103492629211225388e234l,
+     6.91778647261948849222819828311491036e236l,
+     9.6157231969410890041971956135297254e238l,
+     1.34620124757175246058760738589416156e241l,
+     1.89814375907617096942852641411076779e243l,
+     2.69536413788816277658850750803729027e245l,
+     3.85437071718007277052156573649332508e247l,
+     5.55029383273930478955105466055038812e249l,
+     8.04792605747199194484902925779806277e251l,
+     1.17499720439091082394795827163851716e254l,
+     1.72724589045463891120349865930862023e256l,
+     2.55632391787286558858117801577675794e258l,
+     3.80892263763056972698595524350736934e260l,
+     5.713383956445854590478932865261054e262l,
+     8.62720977423324043162318862654419154e264l,
+     1.31133588568345254560672467123471711e267l,
+     2.00634390509568239477828874698911719e269l,
+     3.08976961384735088795856467036324047e271l,
+     4.78914290146339387633577523906302272e273l,
+     7.47106292628289444708380937293831545e275l,
+     1.17295687942641442819215807155131553e278l,
+     1.85327186949373479654360975305107853e280l,
+     2.94670227249503832650433950735121486e282l,
+     4.71472363599206132240694321176194378e284l,
+     7.59070505394721872907517857093672949e286l,
+     1.22969421873944943411017892849175018e289l,
+     2.00440157654530257759959165344155279e291l,
+     3.28721858553429622726333031164414657e293l,
+     5.42391066613158877498449501421284184e295l,
+     9.00369170577843736647426172359331746e297l,
+     1.50361651486499904020120170784008402e300l,
+     2.52607574497319838753801886917134115e302l,
+     4.26906800900470527493925188889956654e304l,
+     7.25741561530799896739672821112926311e306l}};
+
+/// @brief computes factorial as an integer
+
+/// @param k non-negative integer
+/// @pre `k<fac.size()`
+/// @return factorial of \p k
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t fac_int(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < fac.size());
+#endif
+  return fac[k];
+}
+
+/// @brief computes factorial as a real number
+
+/// @param k non-negative integer
+/// @pre `k<fac_ld.size()`
+/// @return factorial of \p k , uses exact representation when possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real fac_real(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < fac.size() || k < fac_ld.size());
+  // for extended-precision types should do better ...
+  assert(k < fac.size() || (std::is_same<Real, double>::value ||
+                            std::is_same<Real, long double>::value));
+#endif
+
+  // use exact representation when available
+  return (k < fac.size()) ? static_cast<Real>(fac[k])
+                          : static_cast<Real>(fac_ld[k]);
+}
+
+/// `df_Kminus1[k] = (k-1)!!`, `0<=k<=34`
+static constexpr std::array<int64_t, 35> df_Kminus1 = {{1LL,
                                                         1LL,
                                                         1LL,
                                                         2LL,
@@ -91,15 +309,391 @@ static constexpr std::array<int64_t, 31> df_Kminus1 = {{1LL,
                                                         51011754393600LL,
                                                         213458046676875LL,
                                                         1428329123020800LL,
-                                                        6190283353629375LL}};
-/// bc(i,j) = binomial coefficient, i! / (j! (i-j)!)
-template <typename Int>
-int64_t bc(Int i, Int j) {
-  assert(i < Int(fac.size()));
-  assert(j < Int(fac.size()));
+                                                        6190283353629375LL,
+                                                        42849873690624000LL,
+                                                        191898783962510625LL,
+                                                        1371195958099968000LL,
+                                                        6332659870762850625LL}};
+
+/// @brief fac_ld[k] = static_cast<long double>(k!), `0<=k<=170`
+/// @note This is an inexact representation for most values, so should only be
+/// used when `fac` does not
+///       suffice, i.e. for `k>=21`.
+/// @note This should be sufficient for computing with `double` reals
+static constexpr std::array<long double, 302> df_Kminus1_ld = {
+    {1.l,
+     1.l,
+     1.l,
+     2.l,
+     3.l,
+     8.l,
+     15.l,
+     48.l,
+     105.l,
+     384.l,
+     945.l,
+     3840.l,
+     10395.l,
+     46080.l,
+     135135.l,
+     645120.l,
+     2.027025e6l,
+     1.032192e7l,
+     3.4459425e7l,
+     1.8579456e8l,
+     6.54729075e8l,
+     3.7158912e9l,
+     1.3749310575e10l,
+     8.17496064e10l,
+     3.16234143225e11l,
+     1.9619905536e12l,
+     7.905853580625e12l,
+     5.10117543936e13l,
+     2.13458046676875e14l,
+     1.4283291230208e15l,
+     6.190283353629375e15l,
+     4.2849873690624e16l,
+     1.91898783962510625e17l,
+     1.371195958099968e18l,
+     6.332659870762850625e18l,
+     4.6620662575398912e19l,
+     2.21643095476699771875e20l,
+     1.678343852714360832e21l,
+     8.200794532637891559375e21l,
+     6.3777066403145711616e22l,
+     3.19830986772877770815625e23l,
+     2.55108265612582846464e24l,
+     1.3113070457687988603440625e25l,
+     1.0714547155728479551488e26l,
+     5.63862029680583509947946875e26l,
+     4.71440074852053100265472e27l,
+     2.5373791335626257947657609375e28l,
+     2.1686243443194442612211712e29l,
+     1.192568192774434123539907640625e30l,
+     1.040939685273333245386162176e31l,
+     5.8435841445947272053455474390625e31l,
+     5.20469842636666622693081088e32l,
+     2.980227913743310874726229193921875e33l,
+     2.7064431817106664380040216576e34l,
+     1.57952079428395476360490147277859375e35l,
+     1.461479318123759876522171695104e36l,
+     8.68736436856175119982695810028226562e36l,
+     8.1842841814930553085241614925824e37l,
+     4.95179769008019818390136611716089141e38l,
+     4.746884825265972078944013665697792e39l,
+     2.92156063714731692850180600912492593e40l,
+     2.8481308951595832473664081994186752e41l,
+     1.78215198865986332638610166556620482e42l,
+     1.76584115499894161336717308363957862e43l,
+     1.12275575285571389562324404930670903e44l,
+     1.13013833919932263255499077352933032e45l,
+     7.29791239356214032155108632049360873e45l,
+     7.45891303871552937486293910529358011e46l,
+     4.88960130368663401543922783473071785e47l,
+     5.07206086632655997490679859159963447e48l,
+     3.37382489954377747065306720596419531e49l,
+     3.55044260642859198243475901411974413e50l,
+     2.39541567867608200416367771623457867e51l,
+     2.55631867662858622735302649016621577e52l,
+     1.74865344543353986303948473285124243e53l,
+     1.89167582070515380824123960272299967e54l,
+     1.31149008407515489727961354963843182e55l,
+     1.43767362373591689426334209806947975e56l,
+     1.0098473647378692709053024332215925e57l,
+     1.12138542651401517752540683649419421e58l,
+     7.97779418142916724015188922245058078e58l,
+     8.97108341211212142020325469195355365e59l,
+     6.46201328695762546452303027018497043e60l,
+     7.35628839793193956456666884740191399e61l,
+     5.36347102817482913555411512425352546e62l,
+     6.17928225426282923423600183181760775e63l,
+     4.55895037394860476522099785561549664e64l,
+     5.31418273866603314144296157536314267e65l,
+     3.96628682533528614574226813438548208e66l,
+     4.67648081002610916446980618631956555e67l,
+     3.52999527454840466971061863960307905e68l,
+     4.20883272902349824802282556768760899e69l,
+     3.21229569983904824943666296203880193e70l,
+     3.87212611070161838818099952227260027e71l,
+     2.9874350008503148719760965546960858e72l,
+     3.63979854405952128489013955093624426e73l,
+     2.83806325080779912837729172696128151e74l,
+     3.49420660229714043349453396889879449e75l,
+     2.75292135328356515452597297515244306e76l,
+     3.4243224702511976248246432895208186e77l,
+     2.72539213975072950298071324540091863e78l,
+     3.4243224702511976248246432895208186e79l,
+     2.75264606114823679801052037785492782e80l,
+     3.49280891965622157732113615531123497e81l,
+     2.83522544298268390195083598919057565e82l,
+     3.63252127644247044041398160152368437e83l,
+     2.97698671513181809704837778865010444e84l,
+     3.85047255302901866683882049761510543e85l,
+     3.18537578519104536384176423385561175e86l,
+     4.15851035727134016018592613742431386e87l,
+     3.4720596058582394465875230149026168e88l,
+     4.57436139299847417620451875116674525e89l,
+     3.85398616250264578571215054654190465e90l,
+     5.12328476015829107734906100130675468e91l,
+     4.35500436362798973785473011759235226e92l,
+     5.84054462658045182817792954148970034e93l,
+     5.0082550181721881985329396352312051e94l,
+     6.77503176683332412068639826812805239e95l,
+     5.85965837126146019228353937322050996e96l,
+     7.99453748486332246240994995639110182e97l,
+     6.97299346180113762881741185413240686e98l,
+     9.59344498183598695489193994766932219e99l,
+     8.4373220887793765308690683435002123e100l,
+     1.17040028778399040849681667361565731e102l,
+     1.03779061691986331329689540625052611e103l,
+     1.45129635685214810653605267528341506e104l,
+     1.29723827114982914162111925781315764e105l,
+     1.82863340963370661423542637085710298e106l,
+     1.6474926043602830098588214574227102e107l,
+     2.34065076433114446622134575469709181e108l,
+     2.12526545962476508271787968007529616e109l,
+     3.04284599363048780608774948110621935e110l,
+     2.78409775210844225836042238089863797e111l,
+     4.01655671159224390403582931506020954e112l,
+     3.7028500103042282036193617665951885e113l,
+     5.38218599353360683140801128218068079e114l,
+     4.99884751391070807488613838490350448e115l,
+     7.31977295120570529071489534376572587e116l,
+     6.84842109405767006259400958731780114e117l,
+     1.01012866726638733011865555743967017e119l,
+     9.51930532074016138700567332637174358e119l,
+     1.41418013417294226216611778041553824e121l,
+     1.34222205022436275556779993901841585e122l,
+     2.0081357905255780122758872481900643e123l,
+     1.91937753182083874046195391279633466e124l,
+     2.89171553835683233767727763739369259e125l,
+     2.78309742114021617366983317355468525e126l,
+     4.22190468600097521300882535059479118e127l,
+     4.09115320907611777529465476512538732e128l,
+     6.24841893528144331525306151888029095e129l,
+     6.09581828152341548518903560003682711e130l,
+     9.37262840292216497287959227832043642e131l,
+     9.20468560510035738263544375605560894e132l,
+     1.42463951724416907587769802630470634e134l,
+     1.40831689758035467954322289467650817e135l,
+     2.19394485655602037685165496050924776e136l,
+     2.18289119124954975329199548674858766e137l,
+     3.4225539762273917878885817383944265e138l,
+     3.42713917026179311266843291419528263e139l,
+     5.40763528243927902486395914666319387e140l,
+     5.44915128071625104914280833357049938e141l,
+     8.6522164519028464397823346346611102e142l,
+     8.773133561953164189119921417048504e143l,
+     1.40165906520826112324473821081509985e145l,
+     1.43002077059836576282654719097890615e146l,
+     2.29872086694154824212137066573676376e147l,
+     2.35953427148730350866380286511519515e148l,
+     3.81587663912297008192147530512302784e149l,
+     3.9404222333837968594685507847423759e150l,
+     6.41067275372658973762807851260668677e151l,
+     6.65931357441861669250185082621461527e152l,
+     1.08981436813352025539677334714313675e154l,
+     1.13874262122558345441781649128269921e155l,
+     1.87448071318965483928245015708619521e156l,
+     1.97002473472025937614282252991906964e157l,
+     3.26159644094999942035146327332997967e158l,
+     3.44754328576045390824993942735837186e159l,
+     5.74040973607199897981857536106076421e160l,
+     6.1021516157960034176023927864243182e161l,
+     1.02179293302081581840770641426881603e163l,
+     1.09228513922748461175082830876995296e164l,
+     1.83922727943746847313387154568386885e165l,
+     1.97703610200174714726899923887361485e166l,
+     3.34739364857619262110364621314464131e167l,
+     3.61797606666319727950226860713871518e168l,
+     6.15920431338019442283070903218614002e169l,
+     6.69325572332691496707919692320662308e170l,
+     1.14561200228871616264651187998662204e172l,
+     1.25163882026213309884380982463963852e173l,
+     2.15375056430278638577544233437484944e174l,
+     2.3655973702954315568148005685689168e175l,
+     4.09212607217529413297334043531221394e176l,
+     4.51829097726427427351626908596663108e177l,
+     7.85688205857656473530881363579945076e178l,
+     8.72030158612004934788639933591559799e179l,
+     1.52423511936385355864990984534509345e181l,
+     1.70045880929340962283784787050354161e182l,
+     2.98750083395315297495382329687638316e183l,
+     3.34990385430801695699056030489197697e184l,
+     5.91525165122724289040857012781523865e185l,
+     6.66630867007295374441121500673503416e186l,
+     1.18305033024544857808171402556304773e188l,
+     1.33992804268466370262665421635374187e189l,
+     2.38976166709580612772506233163735642e190l,
+     2.72005392664986731633210805919809599e191l,
+     4.87511380087544450055912715654020709e192l,
+     5.57611054963222799848082152135609678e193l,
+     1.00427344298034156711518019424728266e195l,
+     1.15425488377387119568553005492071203e196l,
+     2.08888876139911045959957480403434793e197l,
+     2.41239270708739079898275781478428815e198l,
+     4.38666639893813196515910708847213066e199l,
+     5.090148611954394585853618989194848e200l,
+     9.299732765748839766137307027560917e201l,
+     1.08420165434628604678682084469850262e203l,
+     1.99014281187025170995338370389803624e204l,
+     2.33103355684451500059166481610178064e205l,
+     4.29870847363974369349930880041975827e206l,
+     5.05834281835259755128391265094086399e207l,
+     9.37118447253464125182849318491507304e208l,
+     1.10777707721921886373117687055604921e210l,
+     2.06166058395762107540226850068131607e211l,
+     2.44818734065447368884590088392886876e212l,
+     4.57688649638591878739303607151252167e213l,
+     5.45945776965947632612635897116137734e214l,
+     1.02522257519044580837604008001880485e216l,
+     1.2283779981733821733784307685113099e217l,
+     2.31700301993040752692985058084249897e218l,
+     2.78841805585357753356903784452067348e219l,
+     5.28276688544132916140005932432089765e220l,
+     6.38547734790469255187309666395234226e221l,
+     1.21503638365150570712201364459380646e223l,
+     1.47504526736598397948268532937299106e224l,
+     2.81888441007149324052307165545763099e225l,
+     3.43685547296274267219465681743906917e226l,
+     6.59618951956729418282398767377085651e227l,
+     8.07661036146244527965744352098181256e228l,
+     1.55670072661788142714646109100992214e230l,
+     1.91415665566659953127881411447268958e231l,
+     3.70494772935055779660857739660361469e232l,
+     4.57483440704317287975636573358972809e233l,
+     8.89187455044133871186058575184867524e234l,
+     1.10253509209740466402128414179512447e236l,
+     2.15183364120680396827026175194737941e237l,
+     2.67916027379669333357172046456215246e238l,
+     5.25047408454460168257943867475160576e239l,
+     6.56394267080189866725071513817727353e240l,
+     1.29161662479797201391454191398889502e242l,
+     1.62129383968806897081092663912978656e243l,
+     3.20320922949897059450806394669245964e244l,
+     4.03702166082329173731920733143316854e245l,
+     8.0080230737474264862701598667311491e246l,
+     1.0132924368666462260671210401897253e248l,
+     2.01802181458435147454008028641624957e249l,
+     2.56362986527261495194981623168000502e250l,
+     5.12577540904425274533180392749727392e251l,
+     6.53725615644516812747203139078401279e252l,
+     1.31219850471532870280494180543930212e254l,
+     1.68007483220640820876031206743149129e255l,
+     3.38547214216554805323674985803339948e256l,
+     4.35139381541459726068920825464756243e257l,
+     8.80222756963042493841554963088683864e258l,
+     1.1357137858232098850398833544630138e260l,
+     2.30618362324317133386487400329235172e261l,
+     2.98692725671504199765489322223772628e262l,
+     6.08832476536197232140326736869180855e263l,
+     7.91535723029486129378546703892997465e264l,
+     1.61949438758628463749326912007202107e266l,
+     2.11340038048872796544071969939430323e267l,
+     4.34024495873124282848196124179301648e268l,
+     5.68504702351467822703553599137067569e269l,
+     1.17186613885743556369012953528411445e271l,
+     1.54064774337247779952663025366145311e272l,
+     3.1874758976922247332371523359727913e273l,
+     4.205968339406864392707700592495767e274l,
+     8.73368395967669576906979740056544817e275l,
+     1.15664129333688770799461766293633592e277l,
+     2.41049677287076803226326408255606369e278l,
+     3.20389638254317895114509092633365051e279l,
+     6.70118102858073512969187414950585707e280l,
+     8.93887090729546927369480368447088492e281l,
+     1.87633068800260583631372476186163998e283l,
+     2.51182272495002686590823983533631866e284l,
+     5.29125254016734845840470382844982474e285l,
+     7.10845831160857603052031873400178182e286l,
+     1.50271572140752696218693588727975023e288l,
+     2.02591061880844416869829083919050782e289l,
+     4.29776696322552711185463663762008565e290l,
+     5.81436347598023476416409470847675744e291l,
+     1.23775688540895180821413535163458467e293l,
+     1.6803510445582878468434233707497829e294l,
+     3.58949496768596024382099251974029553e295l,
+     4.88982153966461763431436200888186824e296l,
+     1.0481325305643003911957298157641663e298l,
+     1.43271771112173296685410806860238739e299l,
+     3.08150963985904315011544565834664891e300l,
+     4.22651724780911225221961880237704281e301l,
+     9.12126853398276772434171914870608078e302l,
+     1.25527562259930633890922678430598171e304l,
+     2.71813802312686478185383230631441207e305l,
+     3.75327411157192595333858808507488533e306l,
+     8.15441406938059434556149691894323621e307l}};
+
+/// @brief computes double factorial as an integer
+
+/// @param k non-negative integer
+/// @pre `k<df_Kminus1.size()`
+/// @return double factorial of `k-1`; N.B.: `-1!! = 1`
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t df_Kminus1_int(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < df_Kminus1.size());
+#endif
+  return df_Kminus1[k];
+}
+
+/// @brief computes double factorial as a real number
+
+/// @param k non-negative integer
+/// @pre `k<df_Kminus1.size()`
+/// @return double factorial of `k-1` (note that `-1!!=1`), uses exact
+/// representation when possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real df_Kminus1_real(Int k) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || k >= 0);
+  assert(k < df_Kminus1.size() || k < df_Kminus1_ld.size());
+  // for extended-precision types should do better ...
+  assert(k < df_Kminus1.size() || (std::is_same<Real, double>::value ||
+                                   std::is_same<Real, long double>::value));
+#endif
+  // use exact representation when available
+  return (k < df_Kminus1.size()) ? static_cast<Real>(df_Kminus1[k])
+                                 : static_cast<Real>(df_Kminus1_ld[k]);
+}
+
+/// @brief computes binomial coefficient as an integer
+
+/// @param i non-negative integer
+/// @param j non-negative integer
+/// @pre `i>=j`
+/// return `i! / (j! * (i-j)!)`
+template <typename Int, typename = typename std::enable_if<
+                            std::is_integral<Int>::value>::type>
+constexpr int64_t bc_int(Int i, Int j) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
   assert(i >= j);
-  return fac[i] / (fac[j] * fac[i - j]);
+#endif
+  return fac_int(i) / (fac_int(j) * fac_int(i - j));
 }
+
+/// @brief computes binomial coefficient as a real number
+
+/// @param i non-negative integer
+/// @param j non-negative integer
+/// @pre `i>=j`
+/// @return binomial coefficient, (\p i , \p j), uses exact representation when
+/// possible
+template <
+    typename Real, typename Int,
+    typename = typename std::enable_if<std::is_integral<Int>::value>::type>
+constexpr Real bc_real(Int i, Int j) {
+#if LIBINT2_CPLUSPLUS_STD >= 2014
+  assert(!std::is_signed<Int>::value || (i >= 0 && j >= 0));
+  assert(i >= j);
+#endif
+  return fac_real<Real>(i) / (fac_real<Real>(j) * fac_real<Real>(i - j));
+}
+
 }  // namespace math
 
 /// generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
@@ -319,15 +913,14 @@ struct Shell {
     const auto alpha = this->alpha.at(p);
     assert(alpha >= 0.0);
     const auto l = contr.at(c).l;
-    assert(l <= 15);  // due to df_Kminus1[] a 64-bit integer type; kinda
-                      // ridiculous restriction anyway
 
-    using libint2::math::df_Kminus1;
+    using libint2::math::df_Kminus1_real;
     const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212};
     const auto two_alpha = 2 * alpha;
     const auto two_alpha_to_am32 = pow(two_alpha, l + 1) * sqrt(two_alpha);
-    const auto one_over_N = sqrt((sqrt_Pi_cubed * df_Kminus1[2 * l]) /
-                                 (pow(2, l) * two_alpha_to_am32));
+    const auto one_over_N =
+        sqrt((sqrt_Pi_cubed * df_Kminus1_real<real_t>(2 * l)) /
+             (pow(2, l) * two_alpha_to_am32));
     return contr.at(c).coeff[p] * one_over_N;
   }
 
@@ -360,13 +953,11 @@ struct Shell {
   /// before computing integrals. \warning Must be done only once. \note this is
   /// now private
   void renorm() {
-    using libint2::math::df_Kminus1;
+    using libint2::math::df_Kminus1_real;
     using std::pow;
     const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212};
     const auto np = nprim();
     for (auto& c : contr) {
-      assert(c.l <= 15);  // due to df_Kminus1[] a 64-bit integer type; kinda
-                          // ridiculous restriction anyway
       for (auto p = 0ul; p != np; ++p) {
         assert(alpha[p] >= 0);
         if (alpha[p] != 0) {
@@ -375,7 +966,7 @@ struct Shell {
               pow(two_alpha, c.l + 1) * sqrt(two_alpha);
           const auto normalization_factor =
               sqrt(pow(2, c.l) * two_alpha_to_am32 /
-                   (sqrt_Pi_cubed * df_Kminus1[2 * c.l]));
+                   (sqrt_Pi_cubed * df_Kminus1_real<real_t>(2 * c.l)));
 
           c.coeff[p] *= normalization_factor;
         }
@@ -389,8 +980,8 @@ struct Shell {
         for (auto p = 0ul; p != np; ++p) {
           for (decltype(p) q = 0ul; q <= p; ++q) {
             auto gamma = alpha[p] + alpha[q];
-            norm += (p == q ? 1 : 2) * df_Kminus1[2 * c.l] * sqrt_Pi_cubed *
-                    c.coeff[p] * c.coeff[q] /
+            norm += (p == q ? 1 : 2) * df_Kminus1_real<real_t>(2 * c.l) *
+                    sqrt_Pi_cubed * c.coeff[p] * c.coeff[q] /
                     (pow(2, c.l) * pow(gamma, c.l + 1) * sqrt(gamma));
           }
         }
@@ -621,7 +1212,7 @@ struct ShellPair {
                        std::abs(P[2] - B[2])),
               max_l2);
           const auto fac_l1_fac_l2_oogamma_to_l =
-              math::fac[max_l1] * math::fac[max_l2] *
+              math::fac_real<Real>(max_l1) * math::fac_real<Real>(max_l2) *
               std::pow(oogamma, max_l1 + max_l2);
           nonspherical_scr_factor =
               std::max(maxabs_PA_i_to_l1 * maxabs_PB_i_to_l2,
diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h
index f97dc7632..d09705f67 100644
--- a/include/libint2/solidharmonics.h
+++ b/include/libint2/solidharmonics.h
@@ -83,8 +83,7 @@ class SolidHarmonicsCoefficients {
   static const SolidHarmonicsCoefficients& instance(unsigned int l) {
     static std::vector<SolidHarmonicsCoefficients> shg_coefs(
         SolidHarmonicsCoefficients::CtorHelperIter(0),
-        SolidHarmonicsCoefficients::CtorHelperIter(11));
-    assert(l <= 10);  // see coeff() for explanation of the upper limit on l
+        SolidHarmonicsCoefficients::CtorHelperIter(13));
     return shg_coefs[l];
   }
 
@@ -108,9 +107,9 @@ class SolidHarmonicsCoefficients {
    harmonic Ylm is requested.
    ---------------------------------------------------------------------------------------------*/
   static Real coeff(int l, int m, int lx, int ly, int lz) {
-    using libint2::math::bc;
-    using libint2::math::df_Kminus1;
-    using libint2::math::fac;
+    using libint2::math::bc_real;
+    using libint2::math::df_Kminus1_real;
+    using libint2::math::fac_real;
 
     auto abs_m = std::abs(m);
     if ((lx + ly - abs_m) % 2) return 0.0;
@@ -127,13 +126,17 @@ class SolidHarmonicsCoefficients {
     auto i = abs_m - lx;
     if (comp != parity(abs(i))) return 0.0;
 
-    assert(l <= 10);  // libint2::math::fac[] is only defined up to 20
-    Real pfac =
-        sqrt(((Real(fac[2 * lx]) * Real(fac[2 * ly]) * Real(fac[2 * lz])) /
-              fac[2 * l]) *
-             ((Real(fac[l - abs_m])) / (fac[l])) * (Real(1) / fac[l + abs_m]) *
-             (Real(1) / (fac[lx] * fac[ly] * fac[lz])));
-    /*  pfac = sqrt(fac[l-abs_m]/(fac[l]*fac[l]*fac[l+abs_m]));*/
+    Real pfac;
+    pfac = sqrt(((fac_real<Real>(2 * lx) * fac_real<Real>(2 * ly) *
+                  fac_real<Real>(2 * lz)) /
+                 fac_real<Real>(2 * l)) *
+                ((fac_real<Real>(l - abs_m)) / (fac_real<Real>(l))) *
+                (Real(1) / fac_real<Real>(l + abs_m)) *
+                (Real(1) / (fac_real<Real>(lx) * fac_real<Real>(ly) *
+                            fac_real<Real>(lz))));
+
+    /*  pfac =
+     * sqrt(fac_real<Real>(l-abs_m)/(fac_real<Real>(l)*fac_real<Real>(l)*fac[l+abs_m]));*/
     pfac /= (1L << l);
     if (m < 0)
       pfac *= parity((i - 1) / 2);
@@ -144,19 +147,22 @@ class SolidHarmonicsCoefficients {
     auto i_max = (l - abs_m) / 2;
     Real sum = 0;
     for (auto i = i_min; i <= i_max; i++) {
-      Real pfac1 = bc(l, i) * bc(i, j);
-      pfac1 *= (Real(parity(i) * fac[2 * (l - i)]) / fac[l - abs_m - 2 * i]);
+      Real pfac1 = bc_real<Real>(l, i) * bc_real<Real>(i, j);
+      pfac1 *= (Real(parity(i) * fac_real<Real>(2 * (l - i))) /
+                fac_real<Real>(l - abs_m - 2 * i));
       Real sum1 = 0.0;
       const int k_min = std::max((lx - abs_m) / 2, 0);
       const int k_max = std::min(j, lx / 2);
       for (int k = k_min; k <= k_max; k++) {
         if (lx - 2 * k <= abs_m)
-          sum1 += bc(j, k) * bc(abs_m, lx - 2 * k) * parity(k);
+          sum1 += bc_real<Real>(j, k) * bc_real<Real>(abs_m, lx - 2 * k) *
+                  parity(k);
       }
       sum += pfac1 * sum1;
     }
-    sum *= sqrt(Real(df_Kminus1[2 * l]) /
-                (df_Kminus1[2 * lx] * df_Kminus1[2 * ly] * df_Kminus1[2 * lz]));
+    sum *= sqrt(df_Kminus1_real<Real>(2 * l) /
+                (df_Kminus1_real<Real>(2 * lx) * df_Kminus1_real<Real>(2 * ly) *
+                 df_Kminus1_real<Real>(2 * lz)));
 
     Real result = (m == 0) ? pfac * sum : M_SQRT2 * pfac * sum;
     return result;
diff --git a/tests/unit/test-core.cc b/tests/unit/test-core.cc
index dd7e0d6f0..a3bcbd19c 100644
--- a/tests/unit/test-core.cc
+++ b/tests/unit/test-core.cc
@@ -18,9 +18,23 @@
  *
  */
 
+#include <libint2/solidharmonics.h>
+
 #include "catch.hpp"
 #include "fixture.h"
 
+TEST_CASE("SolidHarmonicsCoefficients ctor", "[shell]") {
+  using libint2::solidharmonics::SolidHarmonicsCoefficients;
+  REQUIRE_NOTHROW(SolidHarmonicsCoefficients<long double>::instance(12));
+  auto sh12 = SolidHarmonicsCoefficients<long double>::instance(12);
+  CHECK_NOTHROW(sh12.coeff(12, 0, 0, 0, 12));
+  CHECK(sh12.coeff(12, 0, 0, 0, 12) == Approx(1));
+  CHECK_NOTHROW(SolidHarmonicsCoefficients<long double>::instance(25).coeff(
+      25, 0, 0, 0, 25));
+  CHECK(SolidHarmonicsCoefficients<long double>::instance(25).coeff(
+            25, 0, 0, 0, 25) == Approx(1));
+}
+
 TEST_CASE("Shell ctor", "[shell]") {
   REQUIRE_NOTHROW(Shell{});
   auto s0 = Shell{};

From c02797c9e108391a627633d62075619455f0806e Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 17:00:03 -0500
Subject: [PATCH 41/44] LIBINT_HARD_MAX_AM appears in config.h

---
 configure.ac                     | 1 +
 include/libint2/config.h.in      | 3 +++
 include/libint2/solidharmonics.h | 3 ++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 8ca43e3e3..aded39d1f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -341,6 +341,7 @@ esac
 )
 
 LIBINT_HARD_MAX_AM=12
+AC_DEFINE_UNQUOTED(LIBINT_HARD_MAX_AM,$LIBINT_HARD_MAX_AM)
 LIBINT_MAX_AM=4
 AC_ARG_WITH(max-am,
 AS_HELP_STRING([--with-max-am=N],[Support Gaussians of angular momentum up to N. Can specify values for each derivative levels as a list N0,N1,N2...]),
diff --git a/include/libint2/config.h.in b/include/libint2/config.h.in
index bd42b26e2..8c807f5f1 100644
--- a/include/libint2/config.h.in
+++ b/include/libint2/config.h.in
@@ -47,6 +47,9 @@
 /* Prefix for all names in API */
 #undef LIBINT_API_PREFIX
 
+/* Max AM supported by Libint *in principle* */
+#undef LIBINT_HARD_MAX_AM
+
 /* Max AM (same for all derivatives; if not defined see LIBINT_MAX_AM_LIST) */
 #undef LIBINT_MAX_AM
 
diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h
index d09705f67..166bc23e8 100644
--- a/include/libint2/solidharmonics.h
+++ b/include/libint2/solidharmonics.h
@@ -83,7 +83,8 @@ class SolidHarmonicsCoefficients {
   static const SolidHarmonicsCoefficients& instance(unsigned int l) {
     static std::vector<SolidHarmonicsCoefficients> shg_coefs(
         SolidHarmonicsCoefficients::CtorHelperIter(0),
-        SolidHarmonicsCoefficients::CtorHelperIter(13));
+        SolidHarmonicsCoefficients::CtorHelperIter(LIBINT_HARD_MAX_AM + 1));
+    assert(l < shg_coefs.size());
     return shg_coefs[l];
   }
 

From c6776f6a84c37e1137a5fa9e1e3451af9e931adc Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 19:08:47 -0500
Subject: [PATCH 42/44] to allow SolidHarmonicsCoefficients support higher
 angular momenta get rid of chars and shorts

---
 include/libint2/solidharmonics.h | 35 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h
index 166bc23e8..b34c983dd 100644
--- a/include/libint2/solidharmonics.h
+++ b/include/libint2/solidharmonics.h
@@ -93,11 +93,11 @@ class SolidHarmonicsCoefficients {
     return &values_[0] + row_offset_[r];
   }
   /// returns ptr to row indices
-  const unsigned char* row_idx(size_t r) const {
+  const unsigned int* row_idx(size_t r) const {
     return &colidx_[0] + row_offset_[r];
   }
   /// number of nonzero elements in row \c r
-  unsigned char nnz(size_t r) const {
+  unsigned int nnz(size_t r) const {
     return row_offset_[r + 1] - row_offset_[r];
   }
 
@@ -171,25 +171,24 @@ class SolidHarmonicsCoefficients {
 
  private:
   std::vector<Real> values_;  // elements
-  std::vector<unsigned short>
-      row_offset_;                     // "pointer" to the beginning of each row
-  std::vector<unsigned char> colidx_;  // column indices
-  signed char l_;                      // the angular momentum quantum number
+  std::vector<unsigned int>
+      row_offset_;                    // "pointer" to the beginning of each row
+  std::vector<unsigned int> colidx_;  // column indices
+  int l_;                             // the angular momentum quantum number
 
   void init() {
-    const unsigned short npure = 2 * l_ + 1;
-    const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2;
+    const unsigned int npure = 2 * l_ + 1;
+    const unsigned int ncart = (l_ + 1) * (l_ + 2) / 2;
     std::vector<Real> full_coeff(npure * ncart);
 
     std::vector<int> shg_indices;
     if (libint2::solid_harmonics_ordering() ==
         libint2::SHGShellOrdering_Standard) {
-      for (signed char pure_idx = 0, m = -l_; pure_idx != npure;
-           ++pure_idx, ++m)
+      for (signed int pure_idx = 0, m = -l_; pure_idx != npure; ++pure_idx, ++m)
         shg_indices.push_back(m);
     } else if (libint2::solid_harmonics_ordering() ==
                libint2::SHGShellOrdering_Gaussian) {
-      for (signed char pure_idx = 0, m = 0; pure_idx != npure;
+      for (signed int pure_idx = 0, m = 0; pure_idx != npure;
            ++pure_idx, m = (m > 0 ? -m : 1 - m))
         shg_indices.push_back(m);
     } else {
@@ -197,10 +196,10 @@ class SolidHarmonicsCoefficients {
           "libint2::solid_harmonics_ordering() value not recognized."));
     }
 
-    for (signed char pure_idx = 0; pure_idx != npure; ++pure_idx) {
+    for (signed int pure_idx = 0; pure_idx != npure; ++pure_idx) {
       int m = shg_indices[pure_idx];
-      signed char cart_idx = 0;
-      signed char lx, ly, lz;
+      int cart_idx = 0;
+      int lx, ly, lz;
       FOR_CART(lx, ly, lz, l_)
       full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz);
       // std::cout << "Solid(" << (int)l_ << "," << (int)m << ") += Cartesian("
@@ -221,11 +220,11 @@ class SolidHarmonicsCoefficients {
     row_offset_.resize(npure + 1);
     // 3) copy
     {
-      unsigned short pc = 0;
-      unsigned short cnt = 0;
-      for (unsigned short p = 0; p != npure; ++p) {
+      unsigned int pc = 0;
+      unsigned int cnt = 0;
+      for (unsigned int p = 0; p != npure; ++p) {
         row_offset_[p] = cnt;
-        for (unsigned short c = 0; c != ncart; ++c, ++pc) {
+        for (unsigned int c = 0; c != ncart; ++c, ++pc) {
           if (full_coeff[pc] != 0.0) {
             values_[cnt] = full_coeff[pc];
             colidx_[cnt] = c;

From 01a94ffc6102bbb1513cce976f2ec9ca5870ade0 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 19:09:07 -0500
Subject: [PATCH 43/44] introduced SolidHarmonicsCoefficients::make_instance
 that does not cache the result

---
 include/libint2/solidharmonics.h | 4 ++++
 tests/unit/test-core.cc          | 7 ++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h
index b34c983dd..ddf06443e 100644
--- a/include/libint2/solidharmonics.h
+++ b/include/libint2/solidharmonics.h
@@ -88,6 +88,10 @@ class SolidHarmonicsCoefficients {
     return shg_coefs[l];
   }
 
+  static SolidHarmonicsCoefficients make_instance(unsigned int l) {
+    return SolidHarmonicsCoefficients(l);
+  }
+
   /// returns ptr to row values
   const Real* row_values(size_t r) const {
     return &values_[0] + row_offset_[r];
diff --git a/tests/unit/test-core.cc b/tests/unit/test-core.cc
index a3bcbd19c..5b60fc7c2 100644
--- a/tests/unit/test-core.cc
+++ b/tests/unit/test-core.cc
@@ -29,9 +29,10 @@ TEST_CASE("SolidHarmonicsCoefficients ctor", "[shell]") {
   auto sh12 = SolidHarmonicsCoefficients<long double>::instance(12);
   CHECK_NOTHROW(sh12.coeff(12, 0, 0, 0, 12));
   CHECK(sh12.coeff(12, 0, 0, 0, 12) == Approx(1));
-  CHECK_NOTHROW(SolidHarmonicsCoefficients<long double>::instance(25).coeff(
-      25, 0, 0, 0, 25));
-  CHECK(SolidHarmonicsCoefficients<long double>::instance(25).coeff(
+  CHECK_NOTHROW(
+      SolidHarmonicsCoefficients<long double>::make_instance(25).coeff(25, 0, 0,
+                                                                       0, 25));
+  CHECK(SolidHarmonicsCoefficients<long double>::make_instance(25).coeff(
             25, 0, 0, 0, 25) == Approx(1));
 }
 

From 0d1a1a5439ea56414f4d3a6c9c51cc77af57f175 Mon Sep 17 00:00:00 2001
From: Eduard Valeyev <eduard@valeyev.net>
Date: Mon, 4 Mar 2024 19:13:06 -0500
Subject: [PATCH 44/44] DF autogen unit test only enabled if ERI, ERI3, and
 ERI2 are all supported

---
 tests/unit/test-df-autogen.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/test-df-autogen.cc b/tests/unit/test-df-autogen.cc
index 71bca8db3..d6021cb17 100644
--- a/tests/unit/test-df-autogen.cc
+++ b/tests/unit/test-df-autogen.cc
@@ -130,6 +130,8 @@ Eigen::Tensor<double, 4> compute_eri4(const BasisSet &obs) {
 }
 
 TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
+#if defined(LIBINT2_SUPPORT_ERI) && defined(LIBINT2_SUPPORT_ERI3) && \
+    defined(LIBINT2_SUPPORT_ERI2)
   // Will use Neon as a test case
   libint2::Atom atom;
   atom.atomic_number = 2;
@@ -181,5 +183,6 @@ TEST_CASE("DFBS-Generator", "[dfbs-generator]") {
     }
   }
   REQUIRE_NOTHROW(norm < 1e-10);
+#endif  // LIBINT2_SUPPORT_ERI && LIBINT2_SUPPORT_ERI3 && LIBINT2_SUPPORT_ERI2
 }
 //