fixed lots of small problems in the ham file type

- Added typing. - Allowed lattice to be an alias for the cell block. - Added check for polarized hamiltonian, still not capable of writing anything but un-polarized. - significantly speeded up the processing step before writing the data. There was a lot of cleaning the data, which isn't necessary. - Fixed a bug that could omit the diagonal component if the Hamiltonian value was 0. - streamlined the write process so it is always the same supercell connections that gets written. The order is still not stringent. Also added tests for this. Signed-off-by: Nick Papior <[email protected]>
zerothi · Jan 27, 2025 · d0c2fcb · d0c2fcb
1 parent e995da9
commit d0c2fcb
Show file tree

Hide file tree

Showing 3 changed files with 178 additions and 103 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,10 @@ we hit release version 1.0.0.
   The old values are still respected.
 
 ### Fixed
+- `hamiltonianSile` can now handle skewed lattices with 6 input parameters
+  for the cell block
+- `hamiltonianSile` wrote wrong overlap and supercell connections for
+  more than 1 supercell direction, #887
 - `projection` arguments of several functions has been streamlined
 - `orbitals` arguments with slices without ends returned up to `geometry.na`,
   now it correctly returns up to the maximum number of orbitals.

diff --git a/src/sisl/io/ham.py b/src/sisl/io/ham.py
@@ -25,7 +25,7 @@ class hamiltonianSile(Sile):
     """Hamiltonian file object"""
 
     @sile_fh_open()
-    def read_geometry(self):
+    def read_geometry(self) -> Geometry:
         """Reading a geometry in regular Hamiltonian format"""
 
         cell = np.zeros([3, 3], np.float64)
@@ -44,30 +44,37 @@ def Z2no(i, no):
                 return int(j[0]), int(j[1])
 
         # The format of the geometry file is
-        keys = ["atoms", "cell", "supercell", "nsc"]
+        keys = ("atoms", "cell", "lattice", "supercell", "nsc")
         for _ in range(len(keys)):
-            _, l = self.step_to(keys, case=False)
-            l = l.strip()
-            if "supercell" in l.lower() or "nsc" in l.lower():
+            _, L = self.step_to(keys, case=False)
+            L = L.strip()
+            l = L.lower()
+            if "supercell" in l or "nsc" in l:
                 # We have everything in one line
                 l = l.split()[1:]
                 for i in range(3):
                     nsc[i] = int(l[i])
-            elif "cell" in l.lower():
-                if "begin" in l.lower():
+            elif "cell" in l or "lattice" in l:
+                if "begin" in l:
                     for i in range(3):
                         l = self.readline().split()
                         cell[i, 0] = float(l[0])
                         cell[i, 1] = float(l[1])
                         cell[i, 2] = float(l[2])
                     self.readline()  # step past the block
                 else:
-                    # We have everything in one line
+                    # We have the diagonal in one line
                     l = l.split()[1:]
-                    for i in range(3):
-                        cell[i, i] = float(l[i])
-                    # TODO incorporate rotations
-            elif "atoms" in l.lower():
+                    if len(l) in (3, 6):
+                        cell = np.zeros([len(l)], np.float64)
+                    else:
+                        raise NotImplementedError(
+                            "# of lattice components different "
+                            "from 3 or 6 values is not implemented"
+                        )
+                    for i in range(cell.size):
+                        cell[i] = float(l[i])
+            elif "atoms" in l:
                 l = self.readline()
                 while not l.startswith("end"):
                     ls = l.split()
@@ -83,22 +90,23 @@ def Z2no(i, no):
                 xyz.shape = (-1, 3)
                 self.readline()  # step past the block
 
-        # Create geometry with associated supercell and atoms
-        geom = Geometry(xyz, atoms=Atom[Z], lattice=Lattice(cell, nsc))
+        # Create geometry with associated lattice and atoms
+        geom = Geometry(xyz, atoms=Atom[Z], lattice=Lattice(cell, nsc=nsc))
 
         return geom
 
-    @sile_fh_open()
-    def read_hamiltonian(self, hermitian=True, dtype=np.float64, **kwargs):
+    @sile_fh_open(True)
+    def read_hamiltonian(
+        self, hermitian: bool = True, dtype=np.float64, **kwargs
+    ) -> Hamiltonian:
         """Reads a Hamiltonian (including the geometry)
 
         Reads the Hamiltonian model
         """
         # Read the geometry in this file
         geom = self.read_geometry()
 
-        # Rewind to ensure we can read the entire matrix structure
-        self.fh.seek(0)
+        # TODO parsing geom.no is wrong for non-collinear spin
 
         # With the geometry in place we can read in the entire matrix
         # Create a new sparse matrix
@@ -121,7 +129,8 @@ def i2o(geom, i):
             if not found:
                 break
 
-            # Get supercell
+            # Get supercell specification it the block
+            #   begin matrix <supercell>
             ls = l.split()
             try:
                 isc = np.array([int(ls[i]) for i in range(2, 5)], np.int32)
@@ -138,7 +147,7 @@ def i2o(geom, i):
                 h = float(ls[2])
                 try:
                     s = float(ls[3])
-                except Exception:
+                except IndexError:
                     s = 0.0
                 H[jo, io + off1] = h
                 S[jo, io + off1] = s
@@ -147,16 +156,18 @@ def i2o(geom, i):
                     S[io, jo + off2] = s
                 l = self.readline()
 
+        if np.abs(S).sum() == geom.no:
+            S = None
+
         return Hamiltonian.fromsp(geom, H, S)
 
     @sile_fh_open()
-    def write_geometry(self, geometry, fmt=".8f", **kwargs):
-        """
-        Writes the geometry to the output file
+    def write_geometry(self, geometry: Geometry, fmt: str = ".8f", **kwargs) -> None:
+        """Writes the geometry to the output file
 
         Parameters
         ----------
-        geometry : Geometry
+        geometry :
               The geometry we wish to write
         """
 
@@ -195,31 +206,45 @@ def write_geometry(self, geometry, fmt=".8f", **kwargs):
 
     @wrap_filterwarnings("ignore", category=SparseEfficiencyWarning)
     @sile_fh_open()
-    def write_hamiltonian(self, H, hermitian=True, **kwargs):
+    def write_hamiltonian(
+        self, H: Hamiltonian, hermitian: bool = True, **kwargs
+    ) -> None:
         """Writes the Hamiltonian model to the file
 
         Writes a Hamiltonian model to the intrinsic Hamiltonian file format.
-        The file can be constructed by the implict force of Hermiticity,
+        The file can be constructed by the implicit force of Hermiticity,
         or without.
 
         Utilizing the Hermiticity we reduce the file-size by approximately
         50%.
 
         Parameters
         ----------
-        H : `Hamiltonian` model
-        hermitian : boolean=True
-            whether the stored data is halved using the Hermitian property
+        H :
+        hermitian :
+            whether the stored data is halved using the Hermitian property of the
+            Hamiltonian.
+
+        Notes
+        -----
+        This file format can only be used to write unpolarized spin configurations.
         """
         # We use the upper-triangular form of the Hamiltonian
         # and the overlap matrix for hermitian problems
 
         geom = H.geometry
+        is_orthogonal = H.orthogonal
+
+        if not H.spin.is_unpolarized:
+            raise NotImplementedError(
+                f"{self!r}.write_hamiltonian can only write "
+                "un-polarized Hamiltonians."
+            )
 
         # First write the geometry
         self.write_geometry(geom, **kwargs)
 
-        # We default to the advanced layuot if we have more than one
+        # We default to the advanced layout if we have more than one
         # orbital on any one atom
         advanced = kwargs.get(
             "advanced", np.any(np.array([a.no for a in geom.atoms.atom], np.int32) > 1)
@@ -238,106 +263,131 @@ def write_hamiltonian(self, H, hermitian=True, **kwargs):
         # We currently force the model to be finalized
         # before we can write it
         # This should be easily circumvented
+        # TODO more spin configurations
+        N = len(H)
         h = H.tocsr(0)
-        if not H.orthogonal:
+        if not is_orthogonal:
             S = H.tocsr(H.S_idx)
 
         # If the model is Hermitian we can
         # do with writing out half the entries
         if hermitian:
             herm_acc = kwargs.get("herm_acc", 1e-6)
-            # We check whether it is Hermitian (not S)
-            for i, isc in enumerate(geom.lattice.sc_off):
-                oi = i * geom.no
-                oj = geom.sc_index(-isc) * geom.no
-                # get the difference between the ^\dagger elements
-                diff = h[:, oi : oi + geom.no] - h[:, oj : oj + geom.no].transpose()
-                diff.eliminate_zeros()
-                if np.any(np.abs(diff.data) > herm_acc):
-                    amax = np.amax(np.abs(diff.data))
-                    warn(
-                        f"The model could not be asserted to be Hermitian "
-                        "within the accuracy required ({amax})."
-                    )
-                    hermitian = False
-                del diff
+
+            h_ht = H - H.transpose(hermitian=True)
+            amax = np.abs(h_ht._csr._D).max()
+            if amax > herm_acc:
+                warn(
+                    f"{self!r}.write_hamiltonian could not assert the matrix to be hermitian "
+                    "within the accuracy required ({amax})."
+                )
+                hermitian = False
+            del h_ht
+
+        # numpy arrays are not pickable, so we convert to a tuple of ints
+        # to ensure we can use hash ops on the set.
+        def conv2int_tuple(sc_off):
+            return tuple(map(int, sc_off))
+
+        # This small set determines which supercell connections
+        # we write. In case we are writing the Hermitian values
+        # We'll only write half + 1 of the supercells!
+        write_isc = set(map(conv2int_tuple, geom.lattice.sc_off))
 
         if hermitian:
-            # Remove half of the supercell connections (d matrices)
-            d = len(geom.lattice.sc_off[1:]) // 2
-            for i, isc in enumerate(geom.lattice.sc_off[1 : d + 1]):
+
+            # Remove half of the supercell connections
+            # We simply retain a list of connections we wish to write
+            write_isc = set()
+
+            # binary priority
+            priority = np.array([4, 2, 1])
+
+            def choice(isc):
+                nonlocal priority
+                return sum(priority[isc > 0])
+
+            for i, isc in enumerate(geom.lattice.sc_off):
+                # Select the isc which has the most positive numbers
+                priority_i = choice(isc)
+                priority_j = choice(-isc)
+
                 # We have ^\dagger element, remove it
-                o = (i + 1) * geom.no
-                # Ensure that we remove all nullified quantities
-                # (setting elements to zero will add them internally
-                #  :(, hence this actually constructs the full matrix
-                # Therefore we do it on a row basis, to limit memory
-                # requirements
-                for j in range(geom.no):
-                    h[j, o : o + geom.no] = 0.0
-                    h.eliminate_zeros()
-                    if not H.orthogonal:
-                        S[j, o : o + geom.no] = 0.0
-                        S.eliminate_zeros()
-            o = geom.sc_index(np.zeros([3], np.int32))
-            # Get upper-triangular matrix of the unit-cell H and S
-            ut = triu(h[:, o : o + geom.no], k=0).tocsr()
-            for j in range(geom.no):
-                h[j, o : o + geom.no] = 0.0
-                h[j, o : o + geom.no] = ut[j, :]
-                h.eliminate_zeros()
-            if not H.orthogonal:
-                ut = triu(S[:, o : o + geom.no], k=0).tocsr()
-                for j in range(geom.no):
-                    S[j, o : o + geom.no] = 0.0
-                    S[j, o : o + geom.no] = ut[j, :]
-                    S.eliminate_zeros()
-
-                # Ensure that S and H have the same sparsity pattern
-                for jo, io in ispmatrix(S):
-                    h[jo, io] = h[jo, io]
-
-            del ut
+                jsc = conv2int_tuple(-isc)
+                isc = conv2int_tuple(isc)
+
+                # a set won't double add anything
+                # so no need to check
+                if priority_i > priority_j:
+                    write_isc.add(isc)
+                else:
+                    write_isc.add(jsc)
 
         # Start writing of the model
         # We loop on all super-cells
         for i, isc in enumerate(geom.lattice.sc_off):
+
+            # check if we should write this sc_off
+            if conv2int_tuple(isc) not in write_isc:
+                continue
+
+            # check if the connections belong in the primary unit-cell
+            is_primary = np.all(isc == 0)
+
             # Check that we have any contributions in this
-            # sub-section
-            Hsub = h[:, i * geom.no : (i + 1) * geom.no]
-            if not H.orthogonal:
-                Ssub = S[:, i * geom.no : (i + 1) * geom.no]
+            # sub-section, and immediately remove zeros
+            Hsub = h[:, i * N : (i + 1) * N]
+            Hsub.eliminate_zeros()
+            if not is_orthogonal:
+                Ssub = S[:, i * N : (i + 1) * N]
+                Ssub.eliminate_zeros()
+
+            if hermitian and is_primary:
+                # only write upper right of primary unit-cell connections
+                Hsub = triu(Hsub)
+                if not is_orthogonal:
+                    Ssub = triu(Ssub)
+
+            # Ensure that when h[i,i] == 0, we still write something
+            # Since S != 0.
+            if is_primary:
+                Hsub.setdiag(Hsub.diagonal())
+
             if Hsub.getnnz() == 0:
+                # Skip writing this isc
                 continue
+
             # We have a contribution, write out the information
             self._write("\nbegin matrix {:d} {:d} {:d}\n".format(*isc))
             if advanced:
-                for jo, io, hh in ispmatrixd(Hsub):
-                    o = np.array([jo, io], np.int32)
+                for io, jo, hh in ispmatrixd(Hsub):
+                    o = np.array([io, jo], np.int32)
                     a = geom.o2a(o)
                     o = o - geom.a2o(a)
-                    if not H.orthogonal:
-                        s = Ssub[jo, io]
-                    elif jo == io and i == 0:
-                        s = 1.0
-                    else:
-                        s = 0.0
+
+                    s = 0.0
+                    if not is_orthogonal:
+                        s = Ssub[io, jo]
+                    elif is_primary:
+                        if io == jo:
+                            s = 1.0
                     if s == 0.0:
                         self._write(fmt1_str.format(a[0], o[0], a[1], o[1], hh))
                     else:
                         self._write(fmt2_str.format(a[0], o[0], a[1], o[1], hh, s))
             else:
-                for jo, io, hh in ispmatrixd(Hsub):
-                    if not H.orthogonal:
-                        s = Ssub[jo, io]
-                    elif jo == io and i == 0:
-                        s = 1.0
-                    else:
-                        s = 0.0
+                for io, jo, hh in ispmatrixd(Hsub):
+                    s = 0.0
+                    if not is_orthogonal:
+                        s = Ssub[io, jo]
+                    elif is_primary:
+                        if io == jo:
+                            s = 1.0
                     if s == 0.0:
-                        self._write(fmt1_str.format(jo, io, hh))
+                        self._write(fmt1_str.format(io, jo, hh))
                     else:
-                        self._write(fmt2_str.format(jo, io, hh, s))
+                        self._write(fmt2_str.format(io, jo, hh, s))
+
             self._write("end matrix {:d} {:d} {:d}\n".format(*isc))