Skip to content

Commit

Permalink
ORCA/NBO parsing update.
Browse files Browse the repository at this point in the history
Identified program is now a property. If we already have identified a
program we append to the list and pop whenever we are leaving a program.
This helps us handle code within code parsing.

Parsers now return a dict or none so we can setattributes with them.
  • Loading branch information
shivupa committed Apr 1, 2024
1 parent c4fbc71 commit 42a15f2
Show file tree
Hide file tree
Showing 20 changed files with 333 additions and 133 deletions.
6 changes: 5 additions & 1 deletion cclib/collection/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections import namedtuple
from typing import Any, Dict, List, Mapping, Optional

from cclib.parser import ccData
from cclib.parser_properties import ccData

import numpy

Expand All @@ -35,3 +35,7 @@ def __init__(self, combinator=None, tree=None) -> None:
# ]
# if self._combinator != None:
# assert len(self._combinator.job_list) == 1

@property
def parsed_data(self):
return self._parsed_data
1 change: 1 addition & 0 deletions cclib/combinator/combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class combinator:
DEFAULT_PARSERS = [
cprops.scfenergies,
cprops.atomcoords,
cprops.atomcharges,
cprops.atomnos,
cprops.atomnos,
cprops.charge,
Expand Down
33 changes: 24 additions & 9 deletions cclib/driver/ccdriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
# todo (MOPAC, ["MOPAC20"], True),
# todo (NBO, ["N A T U R A L A T O M I C O R B I T A L A N D"], True),
# todo (NWChem, ["Northwest Computational Chemistry Package"], True),
# todo (ORCA, ["O R C A"], True),
("ORCA", ["****ORCA TERMINATED NORMALLY****"], True),
# todo (Psi3, ["PSI3: An Open-Source Ab Initio Electronic Structure Package"], True),
("psi4", ["Psi4 exiting successfully. Buy a developer a beer!"], True),
# todo (QChem, ["A Quantum Leap Into The Future Of Chemistry"], True),
Expand Down Expand Up @@ -444,11 +444,11 @@ def __init__(
self._tree.add_root()

if self._combinator is None:
self._combinator = auto_combinator(tree)
self._combinator = auto_combinator(self._tree)
# TODO pass graph here
self._ccCollection = ccCollection(self._combinator, self._tree)
self._fileHandler = source
self.identified_program = None
self._identified_program = []

@property
def cccollection(self):
Expand All @@ -466,6 +466,21 @@ def combinator(self):
def tree(self):
return self._tree

@property
def identified_program(self):
if not self._identified_program:
return None
else:
return self._identified_program[-1]

@identified_program.setter
def identified_program(self, in_prog):
if in_prog is None:
if self._identified_program:
self._identified_program.pop()
else:
self._identified_program.append(in_prog)

def process_combinator(self):
"""Process the combinator and populate the ccData object in the ccCollection"""
self.identified_program = None
Expand All @@ -482,6 +497,9 @@ def process_combinator(self):
else:
# if a program is within a program this might mean things are ok but we proceed to a child node.. think about how to handle this?
current_idx = self._tree.get_next_idx()
self.identified_program = program
if do_break:
break
for program, phrases, do_break in triggers_off:
if all([line.lower().find(p.lower()) >= 0 for p in phrases]):
self.identified_program = None
Expand All @@ -496,11 +514,8 @@ def process_combinator(self):
parsed_data = subparser.parse(
self._fileHandler,
self.identified_program,
self._ccCollection._parsed_data[current_idx],
self._ccCollection.parsed_data[current_idx],
)
print(parsed_data)
if parsed_data is not None:
parsed_attribute_name = subparser.__name__
self._ccCollection._parsed_data[current_idx].__setattr__(
parsed_attribute_name, parsed_data
)
self._ccCollection.parsed_data[current_idx].setattributes(parsed_data)
return self._ccCollection
2 changes: 1 addition & 1 deletion cclib/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# they can use:
# from cclib.parser import Gaussian

from cclib.parser.data import ccData
# from cclib.parser.data import ccData

# This allows users to type:
# from cclib.parser import ccopen
2 changes: 2 additions & 0 deletions cclib/parser_properties/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# the terms of the BSD 3-Clause License.
from cclib.parser_properties import utils
from cclib.parser_properties.atombasis import atombasis
from cclib.parser_properties.atomcharges import atomcharges
from cclib.parser_properties.atomcoords import atomcoords
from cclib.parser_properties.atommasses import atommasses
from cclib.parser_properties.atomnos import atomnos
from cclib.parser_properties.base_parser import base_parser
from cclib.parser_properties.charge import charge
from cclib.parser_properties.data import ccData
from cclib.parser_properties.gbasis import gbasis
from cclib.parser_properties.mocoeffs import mocoeffs
from cclib.parser_properties.mosyms import mosyms
Expand Down
27 changes: 15 additions & 12 deletions cclib/parser_properties/atombasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class atombasis(base_parser):
known_codes = ["gaussian", "psi4"]

@staticmethod
def gaussian(file_handler, ccdata) -> list | None:
def gaussian(file_handler, ccdata) -> dict | None:
# ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
dependency_list = ["nmo", "nbasis"]
line = file_handler.last_line
Expand All @@ -26,7 +26,7 @@ def gaussian(file_handler, ccdata) -> list | None:
or line[5:41] == "Alpha Molecular Orbital Coefficients"
or line[5:40] == "Beta Molecular Orbital Coefficients"
):
constructed_data = []
constructed_atombasis = []
if not base_parser.check_dependencies(dependency_list, ccdata, "atombasis"):
return None
beta = False
Expand All @@ -36,7 +36,7 @@ def gaussian(file_handler, ccdata) -> list | None:
symmetries = file_handler.virtual_next()
eigenvalues = file_handler.virtual_next()
base = 0
atombasis = []
curr_atombasis = []
for base in range(0, ccdata.nmo, 5):
for i in range(ccdata.nbasis):
line = file_handler.virtual_next()
Expand All @@ -47,22 +47,23 @@ def gaussian(file_handler, ccdata) -> list | None:
parts = line[:start_of_basis_fn_name].split()
if len(parts) > 1: # New atom
if i > 0:
constructed_data.append(atombasis)
atombasis = []
atombasis.append(i)
atombasis.append(i)
constructed_atombasis.append(curr_atombasis)
curr_atombasis = []
curr_atombasis.append(i)
curr_atombasis.append(i)
constructed_data = {atombasis.__name__: constructed_atombasis}
return constructed_data
return None

@staticmethod
def psi4(file_handler, ccdata) -> list | None:
def psi4(file_handler, ccdata) -> dict | None:
dependency_list = ["nmo", "nbasis"]
if getattr(ccdata, "atombasis") == None:
line = file_handler.last_line
if line.strip() == "-Contraction Scheme:":
file_handler.skip_lines(["headers", "d"], virtual=True)
line = file_handler.virtual_next()
constructed_data = []
constructed_atombasis = []
atombasis_pos = 0
while line.strip():
ao_count = 0
Expand All @@ -73,18 +74,20 @@ def psi4(file_handler, ccdata) -> list | None:
ao_count += multiplier * int(count)
if len(constructed_data) > 0:
atombasis_pos = constructed_data[-1][-1] + 1
constructed_data.append(list(range(atombasis_pos, atombasis_pos + ao_count)))
constructed_atombasis.append(
list(range(atombasis_pos, atombasis_pos + ao_count))
)
line = file_handler.virtual_next()
constructed_data = {atombasis.__name__: constructed_atombasis}
return constructed_data
return None

@staticmethod
def parse(file_handler, program: str, ccdata) -> list | None:
def parse(file_handler, program: str, ccdata) -> dict | None:
constructed_data = None
if program in atombasis.known_codes:
file_handler.virtual_set()
program_parser = getattr(atombasis, program)
constructed_data = program_parser(file_handler, ccdata)
file_handler.virtual_reset()

return constructed_data
193 changes: 193 additions & 0 deletions cclib/parser_properties/atomcharges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Copyright (c) 2024, the cclib development team
#
# This file is part of cclib (http://cclib.github.io) and is distributed under
# the terms of the BSD 3-Clause License.
from cclib.parser_properties import utils
from cclib.parser_properties.base_parser import base_parser

import numpy as np


def orca_parse_charge_section(file_handler, chargestype):
"""Parse a charge section
Parameters
----------
file_handler :
generates lines
chargestype : str
what type of charge we're dealing with, must be one of
'mulliken', 'lowdin', 'chelpg' or 'hirshfeld'
"""
atomcharges = dict()
atomspins = dict()
line = file_handler.last_line
has_spins = "AND SPIN POPULATIONS" in line

file_handler.skip_lines(["dashes"], virtual=True)

# depending on chargestype, decide when to stop parsing lines
# start, stop - indices for slicing lines and grabbing values
# should_stop: when to stop parsing
if chargestype == "mulliken":
should_stop = lambda x: x.startswith("Sum of atomic charges")
start, stop = 8, 20
elif chargestype == "lowdin":
should_stop = lambda x: not bool(x.strip())
start, stop = 8, 20
elif chargestype == "chelpg":
should_stop = lambda x: x.startswith("---")
start, stop = 11, 26
elif chargestype == "hirshfeld":
should_stop = lambda x: not bool(x.strip())
start, stop = 9, 18
file_handler.skip_lines(
["d", "b", "Total integrated alpha density", "Total integrated beta density", "header"],
virtual=True,
)
else:
raise RuntimeError(f"unknown chargestype: {chargestype}")

charges = []
spins = []

line = file_handler.virtual_next()
while not should_stop(line):
# Don't add point charges or embedding potentials.
if "Q :" not in line:
charges.append(float(line[start:stop]))
if has_spins:
spins.append(float(line[stop:]))
line = file_handler.virtual_next()

atomcharges[chargestype] = charges
if has_spins:
atomspins[chargestype] = spins
return atomcharges, atomspins


class atomcharges(base_parser):
"""
Docstring? Units?
"""

known_codes = ["ORCA", "NBO"]

@staticmethod
def ORCA(file_handler, ccdata) -> list | None:
# ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
line = file_handler.last_line
constructed_charge_data = None
constructed_spin_data = None

# ORCA will print atomic charges along with the spin populations,
# so care must be taken about choosing the proper column.
# Population analyses are performed usually only at the end
# of a geometry optimization or other run, so we want to
# leave just the final atom charges.
# Here is an example for Mulliken charges:
# --------------------------------------------
# MULLIKEN ATOMIC CHARGES AND SPIN POPULATIONS
# --------------------------------------------
# 0 H : 0.126447 0.002622
# 1 C : -0.613018 -0.029484
# 2 H : 0.189146 0.015452
# 3 H : 0.320041 0.037434
# ...
# Sum of atomic charges : -0.0000000
# Sum of atomic spin populations: 1.0000000
if line[:23] == "MULLIKEN ATOMIC CHARGES":
constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
file_handler, "mulliken"
)
# Things are the same for Lowdin populations, except that the sums
# are not printed (there is a blank line at the end).
if line[:22] == "LOEWDIN ATOMIC CHARGES":
constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
file_handler, "lowdin"
)
# ------------------
# HIRSHFELD ANALYSIS
# ------------------
#
# Total integrated alpha density = 142.999988722
# Total integrated beta density = 142.999988722
#
# ATOM CHARGE SPIN
# 0 H 0.157924 0.000000
# 1 O -0.209542 0.000000
# 2 C 0.030659 0.000000
# ...
# TOTAL -0.999977 0.000000
if line[:18] == "HIRSHFELD ANALYSIS":
constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
file_handler, "hirshfeld"
)
# CHELPG Charges
# --------------------------------
# 0 C : 0.363939
# 1 H : 0.025695
# ...
# --------------------------------
# Total charge: -0.000000
# --------------------------------
if line.startswith("CHELPG Charges"):
constructed_charge_data, constructed_spin_data = orca_parse_charge_section(
file_handler, "chelpg"
)
# TODO handle atomspins
constructed_data = dict()
if constructed_charge_data:
if ccdata.atomcharges:
constructed_data["atomcharges"] = {**ccdata.atomcharges, **constructed_charge_data}
else:
constructed_data["atomcharges"] = {**constructed_charge_data}
if constructed_spin_data:
if ccdata.atomspins:
constructed_data["atomspins"] = {**ccdata.atomspins, **constructed_spin_data}
else:
constructed_data["atomspins"] = {**constructed_spin_data}
if constructed_data:
return constructed_data
return None

@staticmethod
def NBO(file_handler, ccdata) -> list | None:
atomcharges = dict()
# ccdata is "const" here and we don't need to modify it yet. The driver will set the attr
charges = None
line = file_handler.last_line
if " Atom No Charge" in line:
parsed_charges = []
line = file_handler.virtual_next()
line = file_handler.virtual_next()
while "==============" not in line:
population_analysis = line.split()
atom = population_analysis[0]
no = int(population_analysis[1])
natural_charge = float(population_analysis[2])
core = float(population_analysis[3])
valence = float(population_analysis[4])
rydberg = float(population_analysis[5])
total = float(population_analysis[6])
parsed_charges.append(natural_charge)
line = file_handler.virtual_next()
atomcharges["nbo"] = parsed_charges
constructed_data = dict()
if atomcharges != dict():
if ccdata.atomcharges:
constructed_data["atomcharges"] = {**ccdata.atomcharges, **atomcharges}
else:
constructed_data["atomcharges"] = {**atomcharges}
return constructed_data
return None

@staticmethod
def parse(file_handler, program: str, ccdata) -> list | None:
constructed_data = None
if program in atomcharges.known_codes:
file_handler.virtual_set()
program_parser = getattr(atomcharges, program)
constructed_data = program_parser(file_handler, ccdata)
file_handler.virtual_reset()
return constructed_data
Loading

0 comments on commit 42a15f2

Please sign in to comment.