materialsproject · janosh · Oct 14, 2023 · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023
@@ -16,6 +16,7 @@
     Fatband,
     Grosspop,
     Icohplist,
+    LobsterMatrices,
     Lobsterout,
     MadelungEnergies,
     NciCobiList,

@@ -1688,3 +1688,148 @@ def get_orb_from_str(orbs):
     orbitals = [(int(orb[0]), Orbital(orb_labs.index(orb[1:]))) for orb in orbs]
     orb_label = f"{orbitals[0][0]}{orbitals[0][1].name}-{orbitals[1][0]}{orbitals[1][1].name}"  # type: ignore
     return orb_label, orbitals
+
+
+class LobsterMatrices:
+    """
+    Class to read Matrices file generated by LOBSTER (e.g. hamiltonMatrices.lobster).
+
+    Attributes:
+        for filename == "hamiltonMatrices.lobster"
+        onsite_energies (list[np.arrays]): List real part of onsite energies from the matrices each k-point.
+        average_onsite_energies (dict): dict with average onsite elements energies for all k-points with keys as
+                                        basis used in the LOBSTER computation (uses only real part of matrix).
+        hamilton_matrices (dict[np.arrays]) : dict with the complex hamilton matrix
+                                        at each k-point with k-point and spin as keys
+
+        for filename == "coefficientMatrices.lobster"
+
+        onsite_coefficients (list[np.arrays]): List real part of onsite coefficients from the matrices each k-point.
+        average_onsite_coefficient (dict): dict with average onsite elements coefficients for all k-points with keys as
+                                        basis used in the LOBSTER computation (uses only real part of matrix).
+        coefficient_matrices (dict[np.arrays]) : dict with the coefficients matrix
+                                        at each k-point with k-point and spin as keys
+
+        for filename == "transferMatrices.lobster"
+
+        onsite_transfer (list[np.arrays]): List real part of onsite transfer coefficients from the matrices at each
+                                        k-point.
+        average_onsite_transfer (dict): dict with average onsite elements transfer coefficients for all k-points with
+                                        keys as basis used in the LOBSTER computation (uses only real part of matrix).
+        transfer_matrices (dict[np.arrays]) : dict with the coefficients matrix at
+                                        each k-point with k-point and spin as keys
+
+        for filename == "overlapMatrices.lobster"
+
+        onsite_overlaps (list[np.arrays]): List real part of onsite overlaps from the matrices each k-point.
+        average_onsite_overlaps (dict): dict with average onsite elements overlaps for all k-points with keys as
+                                        basis used in the LOBSTER computation (uses only real part of matrix).
+        overlap_matrices (dict[np.arrays]) : dict with the overlap matrix at
+                                        each k-point with k-point as keys
+    """
+
+    def __init__(self, e_fermi=None, filename: str = "hamiltonMatrices.lobster"):
+        """
+        Args:
+            filename: filename for the hamiltonMatrices file, typically "hamiltonMatrices.lobster".
+            e_fermi: fermi level in eV for the structure only
+            relevant if input file contains hamilton matrices data
+        """
+
+        self._filename = filename
+        # hamiltonMatrices
+        with zopen(self._filename, "rt") as f:
+            file_data = f.readlines()
+        if len(file_data) == 0:
+            raise OSError("Please check provided input file, it seems to be empty")
+
+        pattern_coeff_hamil_trans = r"(\d+)\s+kpoint\s+(\d+)"  # regex pattern to extract spin and k-point number
+        pattern_overlap = r"kpoint\s+(\d+)"  # regex pattern to extract k-point number
+
+        if "hamilton" in self._filename:
+            if e_fermi is None:
+                raise ValueError("Please provide the fermi energy in eV ")
+            self.onsite_energies, self.average_onsite_energies, self.hamilton_matrices = self._parse_matrix(
+                file_data=file_data, pattern=pattern_coeff_hamil_trans, e_fermi=e_fermi
+            )
+
+        elif "coefficient" in self._filename:
+            self.onsite_coefficients, self.average_onsite_coefficient, self.coefficient_matrices = self._parse_matrix(
+                file_data=file_data, pattern=pattern_coeff_hamil_trans, e_fermi=0
+            )
+
+        elif "transfer" in self._filename:
+            self.onsite_transfer, self.average_onsite_transfer, self.transfer_matrices = self._parse_matrix(
+                file_data=file_data, pattern=pattern_coeff_hamil_trans, e_fermi=0
+            )
+
+        elif "overlap" in self._filename:
+            self.onsite_overlaps, self.average_onsite_overlaps, self.overlap_matrices = self._parse_matrix(
+                file_data=file_data, pattern=pattern_overlap, e_fermi=0
+            )
+
+    @staticmethod
+    def _parse_matrix(file_data, pattern, e_fermi):
+        complex_matrices = {}
+        matrix_diagonal_values = []
+        start_inxs_real = []
+        end_inxs_real = []
+        start_inxs_imag = []
+        end_inxs_imag = []
+        # get indices of real and imaginary part of matrix for each k point
+        for i, line in enumerate(file_data):
+            line = line.strip()
+            if "Real parts" in line:
+                start_inxs_real.append(i + 1)
+                if i == 1:  # ignore the first occurrence as files start with real matrices
+                    pass
+                else:
+                    end_inxs_imag.append(i - 1)
+                matches = re.search(pattern, file_data[i - 1])
+                if matches and len(matches.groups()) == 2:
+                    k_point = matches.group(2)
+                    complex_matrices[k_point] = {}
+            if "Imag parts" in line:
+                end_inxs_real.append(i - 1)
+                start_inxs_imag.append(i + 1)
+            # explicitly add the last line as files end with imaginary matrix
+            if i == len(file_data) - 1:
+                end_inxs_imag.append(len(file_data))
+
+        # extract matrix data and store diagonal elements
+        for start_inx_real, end_inx_real, start_inx_imag, end_inx_imag in zip(
+            start_inxs_real, end_inxs_real, start_inxs_imag, end_inxs_imag
+        ):
+            # matrix with text headers
+            matrix_real = file_data[start_inx_real:end_inx_real]
+            matrix_imag = file_data[start_inx_imag:end_inx_imag]
+
+            # extract only numerical data and convert to numpy arrays
+            matrix_array_real = np.array([line.split()[1:] for line in matrix_real[1:]], dtype=float)
+            matrix_array_imag = np.array([line.split()[1:] for line in matrix_imag[1:]], dtype=float)
+
+            # combine real and imaginary parts to create a complex matrix
+            comp_matrix = matrix_array_real + 1j + matrix_array_imag
+
+            matches = re.search(pattern, file_data[start_inx_real - 2])
+            if matches and len(matches.groups()) == 2:
+                spin = Spin.up if matches.group(1) == "1" else Spin.down
+                k_point = matches.group(2)
+                complex_matrices[k_point].update({spin: comp_matrix})
+            elif matches and len(matches.groups()) == 1:
+                k_point = matches.group(1)
+                complex_matrices.update({k_point: comp_matrix})
+            matrix_diagonal_values.append(comp_matrix.real.diagonal() - e_fermi)
+
+        # extract elements basis functions as list
+        elements_basis_functions = [
+            line.split()[:1][0] for line in matrix_real if line.split()[:1][0] != "basisfunction"
+        ]
+
+        # get average row-wise
+        average_matrix_diagonal_values = np.array(matrix_diagonal_values, dtype=float).mean(axis=0)
+
+        # get a dict with basis functions as keys and average values as values
+        average_average_matrix_diag_dict = dict(zip(elements_basis_functions, average_matrix_diagonal_values))
+
+        return matrix_diagonal_values, average_average_matrix_diag_dict, complex_matrices
@@ -1677,7 +1677,7 @@
     "tests/io/lobster/test_inputs.py::TestBandoverlaps::test_attributes": 0.025641332962550223,
     "tests/io/lobster/test_inputs.py::TestBandoverlaps::test_has_good_quality": 0.029428708949126303,
     "tests/io/lobster/test_inputs.py::TestCharge::test_get_structure_with_charges": 0.0030279159545898438,
-    "tests/io/lobster/test_inputs.py::TestCharge::testattributes": 0.0022161250235512853,
+    "tests/io/lobster/test_inputs.py::TestCharge::test_attributes": 0.0022161250235512853,
     "tests/io/lobster/test_inputs.py::TestCohpcar::test_attributes": 0.07801850006217137,
     "tests/io/lobster/test_inputs.py::TestCohpcar::test_cohp_data": 0.08135204098653048,
     "tests/io/lobster/test_inputs.py::TestCohpcar::test_energies": 0.07727941608754918,
@@ -1693,7 +1693,7 @@
     "tests/io/lobster/test_inputs.py::TestFatband::test_get_bandstructure": 3.241345082933549,
     "tests/io/lobster/test_inputs.py::TestFatband::test_raises": 2.288895126024727,
     "tests/io/lobster/test_inputs.py::TestGrosspop::test_structure_with_grosspop": 0.0011580409482121468,
-    "tests/io/lobster/test_inputs.py::TestGrosspop::testattributes": 0.00040716701187193394,
+    "tests/io/lobster/test_inputs.py::TestGrosspop::test_attributes": 0.00040716701187193394,
     "tests/io/lobster/test_inputs.py::TestIcohplist::test_attributes": 0.0028339590062387288,
     "tests/io/lobster/test_inputs.py::TestIcohplist::test_values": 0.001649416983127594,
     "tests/io/lobster/test_inputs.py::TestLobsterin::test_msonable_implementation": 0.0035085839335806668,
@@ -1711,7 +1711,7 @@
     "tests/io/lobster/test_inputs.py::TestLobsterin::test_write_kpoints": 0.037688249023631215,
     "tests/io/lobster/test_inputs.py::TestLobsterin::test_write_lobsterin": 0.030836959020234644,
     "tests/io/lobster/test_inputs.py::TestLobsterout::test_get_doc": 0.004303499008528888,
-    "tests/io/lobster/test_inputs.py::TestLobsterout::testattributes": 0.003141166002023965,
+    "tests/io/lobster/test_inputs.py::TestLobsterout::test_attributes": 0.003141166002023965,
     "tests/io/lobster/test_inputs.py::TestMadelungEnergies::test_attributes": 0.0023860419751144946,
     "tests/io/lobster/test_inputs.py::TestSitePotentials::test_attributes": 0.002964083047118038,
     "tests/io/lobster/test_inputs.py::TestSitePotentials::test_get_structure": 0.0025429160450585186,