ORCA/NBO parsing update.

Identified program is now a property. If we already have identified a program we append to the list and pop whenever we are leaving a program. This helps us handle code within code parsing. Parsers now return a dict or none so we can setattributes with them.
shivupa · Apr 1, 2024 · 42a15f2 · 42a15f2
1 parent c4fbc71
commit 42a15f2
Show file tree

Hide file tree

Showing 20 changed files with 333 additions and 133 deletions.
diff --git a/cclib/collection/collection.py b/cclib/collection/collection.py
@@ -9,7 +9,7 @@
 from collections import namedtuple
 from typing import Any, Dict, List, Mapping, Optional
 
-from cclib.parser import ccData
+from cclib.parser_properties import ccData
 
 import numpy
 
@@ -35,3 +35,7 @@ def __init__(self, combinator=None, tree=None) -> None:
         # ]
         # if self._combinator != None:
         #    assert len(self._combinator.job_list) == 1
+
+    @property
+    def parsed_data(self):
+        return self._parsed_data
diff --git a/cclib/combinator/combinator.py b/cclib/combinator/combinator.py
@@ -16,6 +16,7 @@ class combinator:
 DEFAULT_PARSERS = [
     cprops.scfenergies,
     cprops.atomcoords,
+    cprops.atomcharges,
     cprops.atomnos,
     cprops.atomnos,
     cprops.charge,

diff --git a/cclib/driver/ccdriver.py b/cclib/driver/ccdriver.py
@@ -79,7 +79,7 @@
     # todo     (MOPAC,     ["MOPAC20"],                                        True),
     # todo     (NBO,       ["N A T U R A L   A T O M I C   O R B I T A L   A N D"],                  True),
     # todo     (NWChem,    ["Northwest Computational Chemistry Package"],      True),
-    # todo     (ORCA,      ["O   R   C   A"],                                  True),
+    ("ORCA", ["****ORCA TERMINATED NORMALLY****"], True),
     # todo     (Psi3,      ["PSI3: An Open-Source Ab Initio Electronic Structure Package"],          True),
     ("psi4", ["Psi4 exiting successfully. Buy a developer a beer!"], True),
     # todo     (QChem,     ["A Quantum Leap Into The Future Of Chemistry"],    True),
@@ -444,11 +444,11 @@ def __init__(
             self._tree.add_root()
 
         if self._combinator is None:
-            self._combinator = auto_combinator(tree)
+            self._combinator = auto_combinator(self._tree)
         # TODO pass graph here
         self._ccCollection = ccCollection(self._combinator, self._tree)
         self._fileHandler = source
-        self.identified_program = None
+        self._identified_program = []
 
     @property
     def cccollection(self):
@@ -466,6 +466,21 @@ def combinator(self):
     def tree(self):
         return self._tree
 
+    @property
+    def identified_program(self):
+        if not self._identified_program:
+            return None
+        else:
+            return self._identified_program[-1]
+
+    @identified_program.setter
+    def identified_program(self, in_prog):
+        if in_prog is None:
+            if self._identified_program:
+                self._identified_program.pop()
+        else:
+            self._identified_program.append(in_prog)
+
     def process_combinator(self):
         """Process the combinator and populate the ccData object in the ccCollection"""
         self.identified_program = None
@@ -482,6 +497,9 @@ def process_combinator(self):
                     else:
                         # if a program is within a program this might mean things are ok but we proceed to a child node.. think about how to handle this?
                         current_idx = self._tree.get_next_idx()
+                        self.identified_program = program
+                        if do_break:
+                            break
             for program, phrases, do_break in triggers_off:
                 if all([line.lower().find(p.lower()) >= 0 for p in phrases]):
                     self.identified_program = None
@@ -496,11 +514,8 @@ def process_combinator(self):
                 parsed_data = subparser.parse(
                     self._fileHandler,
                     self.identified_program,
-                    self._ccCollection._parsed_data[current_idx],
+                    self._ccCollection.parsed_data[current_idx],
                 )
-                print(parsed_data)
                 if parsed_data is not None:
-                    parsed_attribute_name = subparser.__name__
-                    self._ccCollection._parsed_data[current_idx].__setattr__(
-                        parsed_attribute_name, parsed_data
-                    )
+                    self._ccCollection.parsed_data[current_idx].setattributes(parsed_data)
+        return self._ccCollection
diff --git a/cclib/parser/__init__.py b/cclib/parser/__init__.py
@@ -12,7 +12,7 @@
 # they can use:
 #         from cclib.parser import Gaussian
 
-from cclib.parser.data import ccData
+# from cclib.parser.data import ccData
 
 # This allows users to type:
 #         from cclib.parser import ccopen
diff --git a/cclib/parser_properties/__init__.py b/cclib/parser_properties/__init__.py
@@ -4,11 +4,13 @@
 # the terms of the BSD 3-Clause License.
 from cclib.parser_properties import utils
 from cclib.parser_properties.atombasis import atombasis
+from cclib.parser_properties.atomcharges import atomcharges
 from cclib.parser_properties.atomcoords import atomcoords
 from cclib.parser_properties.atommasses import atommasses
 from cclib.parser_properties.atomnos import atomnos
 from cclib.parser_properties.base_parser import base_parser
 from cclib.parser_properties.charge import charge
+from cclib.parser_properties.data import ccData
 from cclib.parser_properties.gbasis import gbasis
 from cclib.parser_properties.mocoeffs import mocoeffs
 from cclib.parser_properties.mosyms import mosyms

diff --git a/cclib/parser_properties/atombasis.py b/cclib/parser_properties/atombasis.py
@@ -16,7 +16,7 @@ class atombasis(base_parser):
     known_codes = ["gaussian", "psi4"]
 
     @staticmethod
-    def gaussian(file_handler, ccdata) -> list | None:
+    def gaussian(file_handler, ccdata) -> dict | None:
         # ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
         dependency_list = ["nmo", "nbasis"]
         line = file_handler.last_line
@@ -26,7 +26,7 @@ def gaussian(file_handler, ccdata) -> list | None:
             or line[5:41] == "Alpha Molecular Orbital Coefficients"
             or line[5:40] == "Beta Molecular Orbital Coefficients"
         ):
-            constructed_data = []
+            constructed_atombasis = []
             if not base_parser.check_dependencies(dependency_list, ccdata, "atombasis"):
                 return None
             beta = False
@@ -36,7 +36,7 @@ def gaussian(file_handler, ccdata) -> list | None:
             symmetries = file_handler.virtual_next()
             eigenvalues = file_handler.virtual_next()
             base = 0
-            atombasis = []
+            curr_atombasis = []
             for base in range(0, ccdata.nmo, 5):
                 for i in range(ccdata.nbasis):
                     line = file_handler.virtual_next()
@@ -47,22 +47,23 @@ def gaussian(file_handler, ccdata) -> list | None:
                         parts = line[:start_of_basis_fn_name].split()
                         if len(parts) > 1:  # New atom
                             if i > 0:
-                                constructed_data.append(atombasis)
-                            atombasis = []
-                        atombasis.append(i)
-                    atombasis.append(i)
+                                constructed_atombasis.append(curr_atombasis)
+                            curr_atombasis = []
+                        curr_atombasis.append(i)
+                    curr_atombasis.append(i)
+            constructed_data = {atombasis.__name__: constructed_atombasis}
             return constructed_data
         return None
 
     @staticmethod
-    def psi4(file_handler, ccdata) -> list | None:
+    def psi4(file_handler, ccdata) -> dict | None:
         dependency_list = ["nmo", "nbasis"]
         if getattr(ccdata, "atombasis") == None:
             line = file_handler.last_line
             if line.strip() == "-Contraction Scheme:":
                 file_handler.skip_lines(["headers", "d"], virtual=True)
                 line = file_handler.virtual_next()
-                constructed_data = []
+                constructed_atombasis = []
                 atombasis_pos = 0
                 while line.strip():
                     ao_count = 0
@@ -73,18 +74,20 @@ def psi4(file_handler, ccdata) -> list | None:
                         ao_count += multiplier * int(count)
                     if len(constructed_data) > 0:
                         atombasis_pos = constructed_data[-1][-1] + 1
-                    constructed_data.append(list(range(atombasis_pos, atombasis_pos + ao_count)))
+                    constructed_atombasis.append(
+                        list(range(atombasis_pos, atombasis_pos + ao_count))
+                    )
                     line = file_handler.virtual_next()
+                constructed_data = {atombasis.__name__: constructed_atombasis}
                 return constructed_data
         return None
 
     @staticmethod
-    def parse(file_handler, program: str, ccdata) -> list | None:
+    def parse(file_handler, program: str, ccdata) -> dict | None:
         constructed_data = None
         if program in atombasis.known_codes:
             file_handler.virtual_set()
             program_parser = getattr(atombasis, program)
             constructed_data = program_parser(file_handler, ccdata)
             file_handler.virtual_reset()
-
         return constructed_data
diff --git a/cclib/parser_properties/atomcharges.py b/cclib/parser_properties/atomcharges.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2024, the cclib development team
+#
+# This file is part of cclib (http://cclib.github.io) and is distributed under
+# the terms of the BSD 3-Clause License.
+from cclib.parser_properties import utils
+from cclib.parser_properties.base_parser import base_parser
+
+import numpy as np
+
+
+def orca_parse_charge_section(file_handler, chargestype):
+    """Parse a charge section
+
+    Parameters
+    ----------
+    file_handler :
+      generates lines
+    chargestype : str
+      what type of charge we're dealing with, must be one of
+      'mulliken', 'lowdin', 'chelpg' or 'hirshfeld'
+    """
+    atomcharges = dict()
+    atomspins = dict()
+    line = file_handler.last_line
+    has_spins = "AND SPIN POPULATIONS" in line
+
+    file_handler.skip_lines(["dashes"], virtual=True)
+
+    # depending on chargestype, decide when to stop parsing lines
+    # start, stop - indices for slicing lines and grabbing values
+    # should_stop: when to stop parsing
+    if chargestype == "mulliken":
+        should_stop = lambda x: x.startswith("Sum of atomic charges")
+        start, stop = 8, 20
+    elif chargestype == "lowdin":
+        should_stop = lambda x: not bool(x.strip())
+        start, stop = 8, 20
+    elif chargestype == "chelpg":
+        should_stop = lambda x: x.startswith("---")
+        start, stop = 11, 26
+    elif chargestype == "hirshfeld":
+        should_stop = lambda x: not bool(x.strip())
+        start, stop = 9, 18
+        file_handler.skip_lines(
+            ["d", "b", "Total integrated alpha density", "Total integrated beta density", "header"],
+            virtual=True,
+        )
+    else:
+        raise RuntimeError(f"unknown chargestype: {chargestype}")
+
+    charges = []
+    spins = []
+
+    line = file_handler.virtual_next()
+    while not should_stop(line):
+        # Don't add point charges or embedding potentials.
+        if "Q :" not in line:
+            charges.append(float(line[start:stop]))
+            if has_spins:
+                spins.append(float(line[stop:]))
+        line = file_handler.virtual_next()
+
+    atomcharges[chargestype] = charges
+    if has_spins:
+        atomspins[chargestype] = spins
+    return atomcharges, atomspins
+
+
+class atomcharges(base_parser):
+    """
+    Docstring? Units?
+    """
+
+    known_codes = ["ORCA", "NBO"]
+
+    @staticmethod
+    def ORCA(file_handler, ccdata) -> list | None:
+        # ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
+        line = file_handler.last_line
+        constructed_charge_data = None
+        constructed_spin_data = None
+
+        # ORCA will print atomic charges along with the spin populations,
+        #   so care must be taken about choosing the proper column.
+        # Population analyses are performed usually only at the end
+        #   of a geometry optimization or other run, so we want to
+        #   leave just the final atom charges.
+        # Here is an example for Mulliken charges:
+        # --------------------------------------------
+        # MULLIKEN ATOMIC CHARGES AND SPIN POPULATIONS
+        # --------------------------------------------
+        #    0 H :    0.126447    0.002622
+        #    1 C :   -0.613018   -0.029484
+        #    2 H :    0.189146    0.015452
+        #    3 H :    0.320041    0.037434
+        # ...
+        # Sum of atomic charges         :   -0.0000000
+        # Sum of atomic spin populations:    1.0000000
+        if line[:23] == "MULLIKEN ATOMIC CHARGES":
+            constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
+                file_handler, "mulliken"
+            )
+        # Things are the same for Lowdin populations, except that the sums
+        #   are not printed (there is a blank line at the end).
+        if line[:22] == "LOEWDIN ATOMIC CHARGES":
+            constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
+                file_handler, "lowdin"
+            )
+        # ------------------
+        # HIRSHFELD ANALYSIS
+        # ------------------
+        #
+        # Total integrated alpha density =    142.999988722
+        # Total integrated beta density  =    142.999988722
+        #
+        #   ATOM     CHARGE      SPIN
+        #    0 H    0.157924    0.000000
+        #    1 O   -0.209542    0.000000
+        #    2 C    0.030659    0.000000
+        # ...
+        #   TOTAL  -0.999977    0.000000
+        if line[:18] == "HIRSHFELD ANALYSIS":
+            constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
+                file_handler, "hirshfeld"
+            )
+        # CHELPG Charges
+        # --------------------------------
+        #  0   C   :       0.363939
+        #  1   H   :       0.025695
+        # ...
+        # --------------------------------
+        # Total charge:    -0.000000
+        # --------------------------------
+        if line.startswith("CHELPG Charges"):
+            constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
+                file_handler, "chelpg"
+            )
+        # TODO handle atomspins
+        constructed_data = dict()
+        if constructed_charge_data:
+            if ccdata.atomcharges:
+                constructed_data["atomcharges"] = {**ccdata.atomcharges, **constructed_charge_data}
+            else:
+                constructed_data["atomcharges"] = {**constructed_charge_data}
+        if constructed_spin_data:
+            if ccdata.atomspins:
+                constructed_data["atomspins"] = {**ccdata.atomspins, **constructed_spin_data}
+            else:
+                constructed_data["atomspins"] = {**constructed_spin_data}
+        if constructed_data:
+            return constructed_data
+        return None
+
+    @staticmethod
+    def NBO(file_handler, ccdata) -> list | None:
+        atomcharges = dict()
+        # ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
+        charges = None
+        line = file_handler.last_line
+        if "  Atom No    Charge" in line:
+            parsed_charges = []
+            line = file_handler.virtual_next()
+            line = file_handler.virtual_next()
+            while "==============" not in line:
+                population_analysis = line.split()
+                atom = population_analysis[0]
+                no = int(population_analysis[1])
+                natural_charge = float(population_analysis[2])
+                core = float(population_analysis[3])
+                valence = float(population_analysis[4])
+                rydberg = float(population_analysis[5])
+                total = float(population_analysis[6])
+                parsed_charges.append(natural_charge)
+                line = file_handler.virtual_next()
+            atomcharges["nbo"] = parsed_charges
+        constructed_data = dict()
+        if atomcharges != dict():
+            if ccdata.atomcharges:
+                constructed_data["atomcharges"] = {**ccdata.atomcharges, **atomcharges}
+            else:
+                constructed_data["atomcharges"] = {**atomcharges}
+            return constructed_data
+        return None
+
+    @staticmethod
+    def parse(file_handler, program: str, ccdata) -> list | None:
+        constructed_data = None
+        if program in atomcharges.known_codes:
+            file_handler.virtual_set()
+            program_parser = getattr(atomcharges, program)
+            constructed_data = program_parser(file_handler, ccdata)
+            file_handler.virtual_reset()
+        return constructed_data