From 3adbddf20340c05c3810fb50715899e932510b56 Mon Sep 17 00:00:00 2001
From: Evan Walter Clark Spotte-Smith <espottesmith@gmail.com>
Date: Thu, 5 May 2022 15:53:06 -0700
Subject: [PATCH] Getting rid of single_atom.json

---
 emmet-builders/_version.py                    |  2 +-
 .../emmet/builders/materials/chemenv.py       | 10 +--
 .../emmet/builders/molecules/orbitals.py      |  4 +-
 .../emmet/builders/molecules/single_atom.json | 11 ---
 .../emmet/builders/molecules/thermo.py        | 14 ++-
 .../emmet/builders/qchem/molecules.py         |  4 +-
 emmet-builders/emmet/builders/vasp/thermo.py  | 86 ++++++++++++++-----
 7 files changed, 89 insertions(+), 42 deletions(-)
 delete mode 100644 emmet-builders/emmet/builders/molecules/single_atom.json

diff --git a/emmet-builders/_version.py b/emmet-builders/_version.py
index a97b79aac5..7c4a9591e1 100644
--- a/emmet-builders/_version.py
+++ b/emmet-builders/_version.py
@@ -1 +1 @@
-__version__ = "0.26.0"
\ No newline at end of file
+__version__ = "0.26.0"
diff --git a/emmet-builders/emmet/builders/materials/chemenv.py b/emmet-builders/emmet/builders/materials/chemenv.py
index e5ca55f9c0..853cf65cd5 100644
--- a/emmet-builders/emmet/builders/materials/chemenv.py
+++ b/emmet-builders/emmet/builders/materials/chemenv.py
@@ -9,11 +9,11 @@
 
 class ChemEnvBuilder(MapBuilder):
     def __init__(
-            self,
-            oxidation_states: Store,
-            chemenv: Store,
-            query: Optional[Dict] = None,
-            **kwargs
+        self,
+        oxidation_states: Store,
+        chemenv: Store,
+        query: Optional[Dict] = None,
+        **kwargs
     ):
         self.oxidation_states = oxidation_states
         self.chemenv = chemenv
diff --git a/emmet-builders/emmet/builders/molecules/orbitals.py b/emmet-builders/emmet/builders/molecules/orbitals.py
index 408494bca4..599a06c949 100644
--- a/emmet-builders/emmet/builders/molecules/orbitals.py
+++ b/emmet-builders/emmet/builders/molecules/orbitals.py
@@ -191,7 +191,9 @@ def process_item(self, items: List[Dict]) -> List[Dict]:
                 for best in sorted_entries:
                     task = best["task_id"]
 
-                    task_doc = TaskDocument(**self.tasks.query_one({"task_id": int(task)}))
+                    task_doc = TaskDocument(
+                        **self.tasks.query_one({"task_id": int(task)})
+                    )
 
                     orbital_doc = OrbitalDoc.from_task(
                         task_doc, molecule_id=mol.molecule_id, deprecated=False
diff --git a/emmet-builders/emmet/builders/molecules/single_atom.json b/emmet-builders/emmet/builders/molecules/single_atom.json
deleted file mode 100644
index 7bcb57212f..0000000000
--- a/emmet-builders/emmet/builders/molecules/single_atom.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-"C1": {"enthalpy": 1.481, "entropy": 33.398},
-"F1": {"enthalpy": 1.481, "entropy": 34.767},
-"H1": {"enthalpy": 1.481, "entropy": 26.014},
-"Li1": {"enthalpy": 1.481, "entropy": 31.798},
-"Mg1": {"enthalpy": 1.481, "entropy": 35.462},
-"N1": {"enthalpy": 1.481, "entropy": 33.858},
-"O1": {"enthalpy": 1.481, "entropy": 34.254},
-"P1": {"enthalpy": 1.481, "entropy": 36.224},
-"S1": {"enthalpy": 1.481, "entropy": 36.319}
-}
diff --git a/emmet-builders/emmet/builders/molecules/thermo.py b/emmet-builders/emmet/builders/molecules/thermo.py
index 4d61386863..ef6a455b45 100644
--- a/emmet-builders/emmet/builders/molecules/thermo.py
+++ b/emmet-builders/emmet/builders/molecules/thermo.py
@@ -22,9 +22,17 @@
 
 SETTINGS = EmmetBuildSettings()
 
-single_mol_thermo = loadfn(
-    os.path.join(os.path.dirname(os.path.abspath(__file__)), "single_atom.json")
-)
+single_mol_thermo = {
+    "C1": {"enthalpy": 1.481, "entropy": 33.398},
+    "F1": {"enthalpy": 1.481, "entropy": 34.767},
+    "H1": {"enthalpy": 1.481, "entropy": 26.014},
+    "Li1": {"enthalpy": 1.481, "entropy": 31.798},
+    "Mg1": {"enthalpy": 1.481, "entropy": 35.462},
+    "N1": {"enthalpy": 1.481, "entropy": 33.858},
+    "O1": {"enthalpy": 1.481, "entropy": 34.254},
+    "P1": {"enthalpy": 1.481, "entropy": 36.224},
+    "S1": {"enthalpy": 1.481, "entropy": 36.319},
+}
 
 
 class ThermoBuilder(Builder):
diff --git a/emmet-builders/emmet/builders/qchem/molecules.py b/emmet-builders/emmet/builders/qchem/molecules.py
index 5afb7efea3..dc849ac658 100644
--- a/emmet-builders/emmet/builders/qchem/molecules.py
+++ b/emmet-builders/emmet/builders/qchem/molecules.py
@@ -205,7 +205,9 @@ def get_items(self) -> Iterator[List[Dict]]:
                     TaskDocument(**t).level_of_theory
                     t["is_valid"] = True
                 except Exception as e:
-                    self.logger.info(f"Processing task {t['task_id']} failed with Exception - {e}")
+                    self.logger.info(
+                        f"Processing task {t['task_id']} failed with Exception - {e}"
+                    )
                     t["is_valid"] = False
 
             yield tasks
diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py
index 77eae6d742..e736113e47 100644
--- a/emmet-builders/emmet/builders/vasp/thermo.py
+++ b/emmet-builders/emmet/builders/vasp/thermo.py
@@ -119,17 +119,25 @@ def get_items(self) -> Iterator[List[Dict]]:
         # Remove overlapping chemical systems
         processed = set()
         to_process_chemsys = []
-        for chemsys in sorted(updated_chemsys | new_chemsys | affected_chemsys, key=lambda x: len(x), reverse=True,):
+        for chemsys in sorted(
+            updated_chemsys | new_chemsys | affected_chemsys,
+            key=lambda x: len(x),
+            reverse=True,
+        ):
             if chemsys not in processed:
                 processed |= chemsys_permutations(chemsys)
                 to_process_chemsys.append(chemsys)
 
-        self.logger.info(f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process")
+        self.logger.info(
+            f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process"
+        )
         self.total = len(to_process_chemsys)
 
         # Yield the chemical systems in order of increasing size
         # Will build them in a similar manner to fast Pourbaix
-        for chemsys in sorted(to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False):
+        for chemsys in sorted(
+            to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False
+        ):
             entries = self.get_entries(chemsys)
             yield entries
 
@@ -140,19 +148,25 @@ def process_item(self, item: List[Dict]):
 
         entries = [ComputedStructureEntry.from_dict(entry) for entry in item]
         # determine chemsys
-        elements = sorted(set([el.symbol for e in entries for el in e.composition.elements]))
+        elements = sorted(
+            set([el.symbol for e in entries for el in e.composition.elements])
+        )
         chemsys = "-".join(elements)
 
         self.logger.debug(f"Processing {len(entries)} entries for {chemsys}")
 
-        material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict(dict)
+        material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict(
+            dict
+        )
         pd_entries = []
         for entry in entries:
             material_entries[entry.entry_id][entry.data["run_type"]] = entry
 
         if self.compatibility:
             with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", message="Failed to guess oxidation states.*")
+                warnings.filterwarnings(
+                    "ignore", message="Failed to guess oxidation states.*"
+                )
                 pd_entries = self.compatibility.process_entries(entries)
         else:
             pd_entries = entries
@@ -167,7 +181,10 @@ def process_item(self, item: List[Dict]):
             pd_data = None
 
             if self.phase_diagram:
-                if self.num_phase_diagram_eles is None or len(elements) <= self.num_phase_diagram_eles:
+                if (
+                    self.num_phase_diagram_eles is None
+                    or len(elements) <= self.num_phase_diagram_eles
+                ):
                     pd_doc = PhaseDiagramDoc(chemsys=chemsys, phase_diagram=pd)
                     pd_data = jsanitize(pd_doc.dict(), allow_bson=True)
 
@@ -181,10 +198,14 @@ def process_item(self, item: List[Dict]):
             for e in entries:
                 elsyms.extend([el.symbol for el in e.composition.elements])
 
-            self.logger.warning(f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}")
+            self.logger.warning(
+                f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}"
+            )
             return []
         except Exception as e:
-            self.logger.error(f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}")
+            self.logger.error(
+                f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}"
+            )
             return []
 
         return docs_pd_pair
@@ -204,13 +225,17 @@ def update_targets(self, items):
         phase_diagram_docs = list(filter(None, chain.from_iterable(phase_diagram_docs)))
 
         # Check if already updated this run
-        thermo_docs = [i for i in thermo_docs if i["material_id"] not in self._completed_tasks]
+        thermo_docs = [
+            i for i in thermo_docs if i["material_id"] not in self._completed_tasks
+        ]
 
         self._completed_tasks |= {i["material_id"] for i in thermo_docs}
 
         for item in thermo_docs:
             if isinstance(item["last_updated"], dict):
-                item["last_updated"] = MontyDecoder().process_decoded(item["last_updated"])
+                item["last_updated"] = MontyDecoder().process_decoded(
+                    item["last_updated"]
+                )
 
         if self.phase_diagram:
             self.phase_diagram.update(phase_diagram_docs)
@@ -235,16 +260,26 @@ def get_entries(self, chemsys: str) -> List[Dict]:
         all_chemsys = chemsys_permutations(chemsys)
         cached_chemsys = all_chemsys & set(self._entries_cache.keys())
         query_chemsys = all_chemsys - cached_chemsys
-        all_entries = list(chain.from_iterable(self._entries_cache[c] for c in cached_chemsys))
+        all_entries = list(
+            chain.from_iterable(self._entries_cache[c] for c in cached_chemsys)
+        )
 
-        self.logger.debug(f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}")
-        self.logger.debug(f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}")
+        self.logger.debug(
+            f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}"
+        )
+        self.logger.debug(
+            f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}"
+        )
 
         # Second grab the materials docs
         new_q = dict(self.query)
         new_q["chemsys"] = {"$in": list(query_chemsys)}
         new_q["deprecated"] = False
-        materials_docs = list(self.materials.query(criteria=new_q, properties=["material_id", "entries", "deprecated"]))
+        materials_docs = list(
+            self.materials.query(
+                criteria=new_q, properties=["material_id", "entries", "deprecated"]
+            )
+        )
 
         # Get Oxidation state data for each material
         oxi_states_data = {}
@@ -254,7 +289,10 @@ def get_entries(self, chemsys: str) -> List[Dict]:
                 d["material_id"]: d.get("average_oxidation_states", {})
                 for d in self.oxidation_states.query(
                     properties=["material_id", "average_oxidation_states"],
-                    criteria={"material_id": {"$in": material_ids}, "state": "successful"},
+                    criteria={
+                        "material_id": {"$in": material_ids},
+                        "state": "successful",
+                    },
                 )
             }
 
@@ -265,7 +303,9 @@ def get_entries(self, chemsys: str) -> List[Dict]:
         # Convert the entries into ComputedEntries and store
         for doc in materials_docs:
             for r_type, entry_dict in doc.get("entries", {}).items():
-                entry_dict["data"]["oxidation_states"] = oxi_states_data.get(entry_dict["entry_id"], {})
+                entry_dict["data"]["oxidation_states"] = oxi_states_data.get(
+                    entry_dict["entry_id"], {}
+                )
                 entry_dict["data"]["run_type"] = r_type
                 elsyms = sorted(set([el for el in entry_dict["composition"]]))
                 self._entries_cache["-".join(elsyms)].append(entry_dict)
@@ -282,7 +322,9 @@ def get_updated_chemsys(
 
         updated_mats = self.thermo.newer_in(self.materials, criteria=self.query)
         updated_chemsys = set(
-            self.materials.distinct("chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query})
+            self.materials.distinct(
+                "chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query}
+            )
         )
         self.logger.debug(f"Found {len(updated_chemsys)} updated chemical systems")
 
@@ -306,7 +348,9 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set:
         # First get all chemsys with any of the elements we've marked
         affected_chemsys = set()
         affected_els = list({el for c in chemical_systems for el in c.split("-")})
-        possible_affected_chemsys = self.materials.distinct("chemsys", {"elements": {"$in": affected_els}})
+        possible_affected_chemsys = self.materials.distinct(
+            "chemsys", {"elements": {"$in": affected_els}}
+        )
 
         sub_chemsys = defaultdict(list)
         # Build a dictionary mapping sub_chemsys to all super_chemsys
@@ -318,6 +362,8 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set:
         for chemsys in chemical_systems:
             affected_chemsys |= set(sub_chemsys[chemsys])
 
-        self.logger.debug(f"Found {len(affected_chemsys)} chemical systems affected by this build")
+        self.logger.debug(
+            f"Found {len(affected_chemsys)} chemical systems affected by this build"
+        )
 
         return affected_chemsys