From 3adbddf20340c05c3810fb50715899e932510b56 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Thu, 5 May 2022 15:53:06 -0700 Subject: [PATCH] Getting rid of single_atom.json --- emmet-builders/_version.py | 2 +- .../emmet/builders/materials/chemenv.py | 10 +-- .../emmet/builders/molecules/orbitals.py | 4 +- .../emmet/builders/molecules/single_atom.json | 11 --- .../emmet/builders/molecules/thermo.py | 14 ++- .../emmet/builders/qchem/molecules.py | 4 +- emmet-builders/emmet/builders/vasp/thermo.py | 86 ++++++++++++++----- 7 files changed, 89 insertions(+), 42 deletions(-) delete mode 100644 emmet-builders/emmet/builders/molecules/single_atom.json diff --git a/emmet-builders/_version.py b/emmet-builders/_version.py index a97b79aac5..7c4a9591e1 100644 --- a/emmet-builders/_version.py +++ b/emmet-builders/_version.py @@ -1 +1 @@ -__version__ = "0.26.0" \ No newline at end of file +__version__ = "0.26.0" diff --git a/emmet-builders/emmet/builders/materials/chemenv.py b/emmet-builders/emmet/builders/materials/chemenv.py index e5ca55f9c0..853cf65cd5 100644 --- a/emmet-builders/emmet/builders/materials/chemenv.py +++ b/emmet-builders/emmet/builders/materials/chemenv.py @@ -9,11 +9,11 @@ class ChemEnvBuilder(MapBuilder): def __init__( - self, - oxidation_states: Store, - chemenv: Store, - query: Optional[Dict] = None, - **kwargs + self, + oxidation_states: Store, + chemenv: Store, + query: Optional[Dict] = None, + **kwargs ): self.oxidation_states = oxidation_states self.chemenv = chemenv diff --git a/emmet-builders/emmet/builders/molecules/orbitals.py b/emmet-builders/emmet/builders/molecules/orbitals.py index 408494bca4..599a06c949 100644 --- a/emmet-builders/emmet/builders/molecules/orbitals.py +++ b/emmet-builders/emmet/builders/molecules/orbitals.py @@ -191,7 +191,9 @@ def process_item(self, items: List[Dict]) -> List[Dict]: for best in sorted_entries: task = best["task_id"] - task_doc = TaskDocument(**self.tasks.query_one({"task_id": int(task)})) + task_doc = TaskDocument( + **self.tasks.query_one({"task_id": int(task)}) + ) orbital_doc = OrbitalDoc.from_task( task_doc, molecule_id=mol.molecule_id, deprecated=False diff --git a/emmet-builders/emmet/builders/molecules/single_atom.json b/emmet-builders/emmet/builders/molecules/single_atom.json deleted file mode 100644 index 7bcb57212f..0000000000 --- a/emmet-builders/emmet/builders/molecules/single_atom.json +++ /dev/null @@ -1,11 +0,0 @@ -{ -"C1": {"enthalpy": 1.481, "entropy": 33.398}, -"F1": {"enthalpy": 1.481, "entropy": 34.767}, -"H1": {"enthalpy": 1.481, "entropy": 26.014}, -"Li1": {"enthalpy": 1.481, "entropy": 31.798}, -"Mg1": {"enthalpy": 1.481, "entropy": 35.462}, -"N1": {"enthalpy": 1.481, "entropy": 33.858}, -"O1": {"enthalpy": 1.481, "entropy": 34.254}, -"P1": {"enthalpy": 1.481, "entropy": 36.224}, -"S1": {"enthalpy": 1.481, "entropy": 36.319} -} diff --git a/emmet-builders/emmet/builders/molecules/thermo.py b/emmet-builders/emmet/builders/molecules/thermo.py index 4d61386863..ef6a455b45 100644 --- a/emmet-builders/emmet/builders/molecules/thermo.py +++ b/emmet-builders/emmet/builders/molecules/thermo.py @@ -22,9 +22,17 @@ SETTINGS = EmmetBuildSettings() -single_mol_thermo = loadfn( - os.path.join(os.path.dirname(os.path.abspath(__file__)), "single_atom.json") -) +single_mol_thermo = { + "C1": {"enthalpy": 1.481, "entropy": 33.398}, + "F1": {"enthalpy": 1.481, "entropy": 34.767}, + "H1": {"enthalpy": 1.481, "entropy": 26.014}, + "Li1": {"enthalpy": 1.481, "entropy": 31.798}, + "Mg1": {"enthalpy": 1.481, "entropy": 35.462}, + "N1": {"enthalpy": 1.481, "entropy": 33.858}, + "O1": {"enthalpy": 1.481, "entropy": 34.254}, + "P1": {"enthalpy": 1.481, "entropy": 36.224}, + "S1": {"enthalpy": 1.481, "entropy": 36.319}, +} class ThermoBuilder(Builder): diff --git a/emmet-builders/emmet/builders/qchem/molecules.py b/emmet-builders/emmet/builders/qchem/molecules.py index 5afb7efea3..dc849ac658 100644 --- a/emmet-builders/emmet/builders/qchem/molecules.py +++ b/emmet-builders/emmet/builders/qchem/molecules.py @@ -205,7 +205,9 @@ def get_items(self) -> Iterator[List[Dict]]: TaskDocument(**t).level_of_theory t["is_valid"] = True except Exception as e: - self.logger.info(f"Processing task {t['task_id']} failed with Exception - {e}") + self.logger.info( + f"Processing task {t['task_id']} failed with Exception - {e}" + ) t["is_valid"] = False yield tasks diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py index 77eae6d742..e736113e47 100644 --- a/emmet-builders/emmet/builders/vasp/thermo.py +++ b/emmet-builders/emmet/builders/vasp/thermo.py @@ -119,17 +119,25 @@ def get_items(self) -> Iterator[List[Dict]]: # Remove overlapping chemical systems processed = set() to_process_chemsys = [] - for chemsys in sorted(updated_chemsys | new_chemsys | affected_chemsys, key=lambda x: len(x), reverse=True,): + for chemsys in sorted( + updated_chemsys | new_chemsys | affected_chemsys, + key=lambda x: len(x), + reverse=True, + ): if chemsys not in processed: processed |= chemsys_permutations(chemsys) to_process_chemsys.append(chemsys) - self.logger.info(f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process") + self.logger.info( + f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process" + ) self.total = len(to_process_chemsys) # Yield the chemical systems in order of increasing size # Will build them in a similar manner to fast Pourbaix - for chemsys in sorted(to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False): + for chemsys in sorted( + to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False + ): entries = self.get_entries(chemsys) yield entries @@ -140,19 +148,25 @@ def process_item(self, item: List[Dict]): entries = [ComputedStructureEntry.from_dict(entry) for entry in item] # determine chemsys - elements = sorted(set([el.symbol for e in entries for el in e.composition.elements])) + elements = sorted( + set([el.symbol for e in entries for el in e.composition.elements]) + ) chemsys = "-".join(elements) self.logger.debug(f"Processing {len(entries)} entries for {chemsys}") - material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict(dict) + material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict( + dict + ) pd_entries = [] for entry in entries: material_entries[entry.entry_id][entry.data["run_type"]] = entry if self.compatibility: with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Failed to guess oxidation states.*") + warnings.filterwarnings( + "ignore", message="Failed to guess oxidation states.*" + ) pd_entries = self.compatibility.process_entries(entries) else: pd_entries = entries @@ -167,7 +181,10 @@ def process_item(self, item: List[Dict]): pd_data = None if self.phase_diagram: - if self.num_phase_diagram_eles is None or len(elements) <= self.num_phase_diagram_eles: + if ( + self.num_phase_diagram_eles is None + or len(elements) <= self.num_phase_diagram_eles + ): pd_doc = PhaseDiagramDoc(chemsys=chemsys, phase_diagram=pd) pd_data = jsanitize(pd_doc.dict(), allow_bson=True) @@ -181,10 +198,14 @@ def process_item(self, item: List[Dict]): for e in entries: elsyms.extend([el.symbol for el in e.composition.elements]) - self.logger.warning(f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}") + self.logger.warning( + f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}" + ) return [] except Exception as e: - self.logger.error(f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}") + self.logger.error( + f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}" + ) return [] return docs_pd_pair @@ -204,13 +225,17 @@ def update_targets(self, items): phase_diagram_docs = list(filter(None, chain.from_iterable(phase_diagram_docs))) # Check if already updated this run - thermo_docs = [i for i in thermo_docs if i["material_id"] not in self._completed_tasks] + thermo_docs = [ + i for i in thermo_docs if i["material_id"] not in self._completed_tasks + ] self._completed_tasks |= {i["material_id"] for i in thermo_docs} for item in thermo_docs: if isinstance(item["last_updated"], dict): - item["last_updated"] = MontyDecoder().process_decoded(item["last_updated"]) + item["last_updated"] = MontyDecoder().process_decoded( + item["last_updated"] + ) if self.phase_diagram: self.phase_diagram.update(phase_diagram_docs) @@ -235,16 +260,26 @@ def get_entries(self, chemsys: str) -> List[Dict]: all_chemsys = chemsys_permutations(chemsys) cached_chemsys = all_chemsys & set(self._entries_cache.keys()) query_chemsys = all_chemsys - cached_chemsys - all_entries = list(chain.from_iterable(self._entries_cache[c] for c in cached_chemsys)) + all_entries = list( + chain.from_iterable(self._entries_cache[c] for c in cached_chemsys) + ) - self.logger.debug(f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}") - self.logger.debug(f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}") + self.logger.debug( + f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}" + ) + self.logger.debug( + f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}" + ) # Second grab the materials docs new_q = dict(self.query) new_q["chemsys"] = {"$in": list(query_chemsys)} new_q["deprecated"] = False - materials_docs = list(self.materials.query(criteria=new_q, properties=["material_id", "entries", "deprecated"])) + materials_docs = list( + self.materials.query( + criteria=new_q, properties=["material_id", "entries", "deprecated"] + ) + ) # Get Oxidation state data for each material oxi_states_data = {} @@ -254,7 +289,10 @@ def get_entries(self, chemsys: str) -> List[Dict]: d["material_id"]: d.get("average_oxidation_states", {}) for d in self.oxidation_states.query( properties=["material_id", "average_oxidation_states"], - criteria={"material_id": {"$in": material_ids}, "state": "successful"}, + criteria={ + "material_id": {"$in": material_ids}, + "state": "successful", + }, ) } @@ -265,7 +303,9 @@ def get_entries(self, chemsys: str) -> List[Dict]: # Convert the entries into ComputedEntries and store for doc in materials_docs: for r_type, entry_dict in doc.get("entries", {}).items(): - entry_dict["data"]["oxidation_states"] = oxi_states_data.get(entry_dict["entry_id"], {}) + entry_dict["data"]["oxidation_states"] = oxi_states_data.get( + entry_dict["entry_id"], {} + ) entry_dict["data"]["run_type"] = r_type elsyms = sorted(set([el for el in entry_dict["composition"]])) self._entries_cache["-".join(elsyms)].append(entry_dict) @@ -282,7 +322,9 @@ def get_updated_chemsys( updated_mats = self.thermo.newer_in(self.materials, criteria=self.query) updated_chemsys = set( - self.materials.distinct("chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query}) + self.materials.distinct( + "chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query} + ) ) self.logger.debug(f"Found {len(updated_chemsys)} updated chemical systems") @@ -306,7 +348,9 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set: # First get all chemsys with any of the elements we've marked affected_chemsys = set() affected_els = list({el for c in chemical_systems for el in c.split("-")}) - possible_affected_chemsys = self.materials.distinct("chemsys", {"elements": {"$in": affected_els}}) + possible_affected_chemsys = self.materials.distinct( + "chemsys", {"elements": {"$in": affected_els}} + ) sub_chemsys = defaultdict(list) # Build a dictionary mapping sub_chemsys to all super_chemsys @@ -318,6 +362,8 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set: for chemsys in chemical_systems: affected_chemsys |= set(sub_chemsys[chemsys]) - self.logger.debug(f"Found {len(affected_chemsys)} chemical systems affected by this build") + self.logger.debug( + f"Found {len(affected_chemsys)} chemical systems affected by this build" + ) return affected_chemsys