Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Single atom fix #413

Merged
merged 2 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion emmet-builders/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.26.0"
__version__ = "0.26.0"
10 changes: 5 additions & 5 deletions emmet-builders/emmet/builders/materials/chemenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@

class ChemEnvBuilder(MapBuilder):
def __init__(
self,
oxidation_states: Store,
chemenv: Store,
query: Optional[Dict] = None,
**kwargs
self,
oxidation_states: Store,
chemenv: Store,
query: Optional[Dict] = None,
**kwargs
):
self.oxidation_states = oxidation_states
self.chemenv = chemenv
Expand Down
4 changes: 3 additions & 1 deletion emmet-builders/emmet/builders/molecules/orbitals.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,9 @@ def process_item(self, items: List[Dict]) -> List[Dict]:
for best in sorted_entries:
task = best["task_id"]

task_doc = TaskDocument(**self.tasks.query_one({"task_id": int(task)}))
task_doc = TaskDocument(
**self.tasks.query_one({"task_id": int(task)})
)

orbital_doc = OrbitalDoc.from_task(
task_doc, molecule_id=mol.molecule_id, deprecated=False
Expand Down
11 changes: 0 additions & 11 deletions emmet-builders/emmet/builders/molecules/single_atom.json

This file was deleted.

14 changes: 11 additions & 3 deletions emmet-builders/emmet/builders/molecules/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,17 @@

SETTINGS = EmmetBuildSettings()

single_mol_thermo = loadfn(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "single_atom.json")
)
single_mol_thermo = {
"C1": {"enthalpy": 1.481, "entropy": 33.398},
"F1": {"enthalpy": 1.481, "entropy": 34.767},
"H1": {"enthalpy": 1.481, "entropy": 26.014},
"Li1": {"enthalpy": 1.481, "entropy": 31.798},
"Mg1": {"enthalpy": 1.481, "entropy": 35.462},
"N1": {"enthalpy": 1.481, "entropy": 33.858},
"O1": {"enthalpy": 1.481, "entropy": 34.254},
"P1": {"enthalpy": 1.481, "entropy": 36.224},
"S1": {"enthalpy": 1.481, "entropy": 36.319},
}


class ThermoBuilder(Builder):
Expand Down
4 changes: 3 additions & 1 deletion emmet-builders/emmet/builders/qchem/molecules.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ def get_items(self) -> Iterator[List[Dict]]:
TaskDocument(**t).level_of_theory
t["is_valid"] = True
except Exception as e:
self.logger.info(f"Processing task {t['task_id']} failed with Exception - {e}")
self.logger.info(
f"Processing task {t['task_id']} failed with Exception - {e}"
)
t["is_valid"] = False

yield tasks
Expand Down
86 changes: 66 additions & 20 deletions emmet-builders/emmet/builders/vasp/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,25 @@ def get_items(self) -> Iterator[List[Dict]]:
# Remove overlapping chemical systems
processed = set()
to_process_chemsys = []
for chemsys in sorted(updated_chemsys | new_chemsys | affected_chemsys, key=lambda x: len(x), reverse=True,):
for chemsys in sorted(
updated_chemsys | new_chemsys | affected_chemsys,
key=lambda x: len(x),
reverse=True,
):
if chemsys not in processed:
processed |= chemsys_permutations(chemsys)
to_process_chemsys.append(chemsys)

self.logger.info(f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process")
self.logger.info(
f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process"
)
self.total = len(to_process_chemsys)

# Yield the chemical systems in order of increasing size
# Will build them in a similar manner to fast Pourbaix
for chemsys in sorted(to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False):
for chemsys in sorted(
to_process_chemsys, key=lambda x: len(x.split("-")), reverse=False
):
entries = self.get_entries(chemsys)
yield entries

Expand All @@ -140,19 +148,25 @@ def process_item(self, item: List[Dict]):

entries = [ComputedStructureEntry.from_dict(entry) for entry in item]
# determine chemsys
elements = sorted(set([el.symbol for e in entries for el in e.composition.elements]))
elements = sorted(
set([el.symbol for e in entries for el in e.composition.elements])
)
chemsys = "-".join(elements)

self.logger.debug(f"Processing {len(entries)} entries for {chemsys}")

material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict(dict)
material_entries: Dict[str, Dict[str, ComputedStructureEntry]] = defaultdict(
dict
)
pd_entries = []
for entry in entries:
material_entries[entry.entry_id][entry.data["run_type"]] = entry

if self.compatibility:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Failed to guess oxidation states.*")
warnings.filterwarnings(
"ignore", message="Failed to guess oxidation states.*"
)
pd_entries = self.compatibility.process_entries(entries)
else:
pd_entries = entries
Expand All @@ -167,7 +181,10 @@ def process_item(self, item: List[Dict]):
pd_data = None

if self.phase_diagram:
if self.num_phase_diagram_eles is None or len(elements) <= self.num_phase_diagram_eles:
if (
self.num_phase_diagram_eles is None
or len(elements) <= self.num_phase_diagram_eles
):
pd_doc = PhaseDiagramDoc(chemsys=chemsys, phase_diagram=pd)
pd_data = jsanitize(pd_doc.dict(), allow_bson=True)

Expand All @@ -181,10 +198,14 @@ def process_item(self, item: List[Dict]):
for e in entries:
elsyms.extend([el.symbol for el in e.composition.elements])

self.logger.warning(f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}")
self.logger.warning(
f"Phase diagram error in chemsys {'-'.join(sorted(set(elsyms)))}: {p}"
)
return []
except Exception as e:
self.logger.error(f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}")
self.logger.error(
f"Got unexpected error while processing {[ent_.entry_id for ent_ in entries]}: {e}"
)
return []

return docs_pd_pair
Expand All @@ -204,13 +225,17 @@ def update_targets(self, items):
phase_diagram_docs = list(filter(None, chain.from_iterable(phase_diagram_docs)))

# Check if already updated this run
thermo_docs = [i for i in thermo_docs if i["material_id"] not in self._completed_tasks]
thermo_docs = [
i for i in thermo_docs if i["material_id"] not in self._completed_tasks
]

self._completed_tasks |= {i["material_id"] for i in thermo_docs}

for item in thermo_docs:
if isinstance(item["last_updated"], dict):
item["last_updated"] = MontyDecoder().process_decoded(item["last_updated"])
item["last_updated"] = MontyDecoder().process_decoded(
item["last_updated"]
)

if self.phase_diagram:
self.phase_diagram.update(phase_diagram_docs)
Expand All @@ -235,16 +260,26 @@ def get_entries(self, chemsys: str) -> List[Dict]:
all_chemsys = chemsys_permutations(chemsys)
cached_chemsys = all_chemsys & set(self._entries_cache.keys())
query_chemsys = all_chemsys - cached_chemsys
all_entries = list(chain.from_iterable(self._entries_cache[c] for c in cached_chemsys))
all_entries = list(
chain.from_iterable(self._entries_cache[c] for c in cached_chemsys)
)

self.logger.debug(f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}")
self.logger.debug(f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}")
self.logger.debug(
f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}"
)
self.logger.debug(
f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}"
)

# Second grab the materials docs
new_q = dict(self.query)
new_q["chemsys"] = {"$in": list(query_chemsys)}
new_q["deprecated"] = False
materials_docs = list(self.materials.query(criteria=new_q, properties=["material_id", "entries", "deprecated"]))
materials_docs = list(
self.materials.query(
criteria=new_q, properties=["material_id", "entries", "deprecated"]
)
)

# Get Oxidation state data for each material
oxi_states_data = {}
Expand All @@ -254,7 +289,10 @@ def get_entries(self, chemsys: str) -> List[Dict]:
d["material_id"]: d.get("average_oxidation_states", {})
for d in self.oxidation_states.query(
properties=["material_id", "average_oxidation_states"],
criteria={"material_id": {"$in": material_ids}, "state": "successful"},
criteria={
"material_id": {"$in": material_ids},
"state": "successful",
},
)
}

Expand All @@ -265,7 +303,9 @@ def get_entries(self, chemsys: str) -> List[Dict]:
# Convert the entries into ComputedEntries and store
for doc in materials_docs:
for r_type, entry_dict in doc.get("entries", {}).items():
entry_dict["data"]["oxidation_states"] = oxi_states_data.get(entry_dict["entry_id"], {})
entry_dict["data"]["oxidation_states"] = oxi_states_data.get(
entry_dict["entry_id"], {}
)
entry_dict["data"]["run_type"] = r_type
elsyms = sorted(set([el for el in entry_dict["composition"]]))
self._entries_cache["-".join(elsyms)].append(entry_dict)
Expand All @@ -282,7 +322,9 @@ def get_updated_chemsys(

updated_mats = self.thermo.newer_in(self.materials, criteria=self.query)
updated_chemsys = set(
self.materials.distinct("chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query})
self.materials.distinct(
"chemsys", {"material_id": {"$in": list(updated_mats)}, **self.query}
)
)
self.logger.debug(f"Found {len(updated_chemsys)} updated chemical systems")

Expand All @@ -306,7 +348,9 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set:
# First get all chemsys with any of the elements we've marked
affected_chemsys = set()
affected_els = list({el for c in chemical_systems for el in c.split("-")})
possible_affected_chemsys = self.materials.distinct("chemsys", {"elements": {"$in": affected_els}})
possible_affected_chemsys = self.materials.distinct(
"chemsys", {"elements": {"$in": affected_els}}
)

sub_chemsys = defaultdict(list)
# Build a dictionary mapping sub_chemsys to all super_chemsys
Expand All @@ -318,6 +362,8 @@ def get_affected_chemsys(self, chemical_systems: Set) -> Set:
for chemsys in chemical_systems:
affected_chemsys |= set(sub_chemsys[chemsys])

self.logger.debug(f"Found {len(affected_chemsys)} chemical systems affected by this build")
self.logger.debug(
f"Found {len(affected_chemsys)} chemical systems affected by this build"
)

return affected_chemsys