Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimade Materials Doc #230

Merged
merged 4 commits into from
Jul 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions emmet-builders/emmet/builders/materials/optimade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from maggma.builders.map_builder import MapBuilder
from maggma.core import Store
from pymatgen.core.structure import Structure

from emmet.core.optimade import OptimadeMaterialsDoc
from emmet.core.utils import jsanitize


class OptimadeMaterialsBuilder(MapBuilder):
def __init__(
self,
materials: Store,
optimade: Store,
**kwargs,
):
"""
Creates Optimade compatible structure docs for the materials

Args:
materials: Store of materials docs
optimade: Store to update with optimade document
query : query on materials to limit search
"""
self.materials = materials
self.optimade = optimade
self.kwargs = kwargs

# Enforce that we key on material_id
self.materials.key = "material_id"
self.optimade.key = "material_id"
super().__init__(
source=materials,
target=optimade,
projection=["structure"],
**kwargs,
)

def unary_function(self, item):
structure = Structure.from_dict(item["structure"])
mpid = item["material_id"]
last_updated = item["last_updated"]

optimade_doc = OptimadeMaterialsDoc.from_structure(
structure=structure, material_id=mpid, last_updated=last_updated
)
doc = jsanitize(optimade_doc.dict(), allow_bson=True)

return doc
121 changes: 121 additions & 0 deletions emmet-core/emmet/core/optimade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import string
from datetime import datetime
from typing import Dict

from monty.fractions import gcd
from optimade.models import Species, StructureResourceAttributes
from pymatgen.core.composition import Composition, formula_double_format
from pymatgen.core.structure import Structure

from emmet.core.mpid import MPID

letters = "ABCDEFGHIJKLMNOPQRSTUVXYZ"


def optimade_form(comp: Composition):

symbols = sorted([str(e) for e in comp.keys()])
reduced_form = []
for s in symbols:
reduced_form.append(s)
if comp[s] != 1:
reduced_form.append(str(int(comp[s])))

return "".join(reduced_form)


def optimade_anonymous_form(comp: Composition):

reduced = comp.element_composition
if all(x == int(x) for x in comp.values()):
reduced /= gcd(*(int(i) for i in comp.values()))

anon = []

for e, amt in zip(string.ascii_uppercase, sorted(reduced.values(), reverse=True)):
if amt == 1:
amt_str = ""
elif abs(amt % 1) < 1e-8:
amt_str = str(int(amt))
else:
amt_str = str(amt)
anon.append(str(e))
anon.append(amt_str)
return "".join(anon)


def hill_formula(comp: Composition) -> str:
"""
:return: Hill formula. The Hill system (or Hill notation) is a system
of writing empirical chemical formulas, molecular chemical formulas and
components of a condensed formula such that the number of carbon atoms
in a molecule is indicated first, the number of hydrogen atoms next,
and then the number of all other chemical elements subsequently, in
alphabetical order of the chemical symbols. When the formula contains
no carbon, all the elements, including hydrogen, are listed
alphabetically.
"""
c = comp.element_composition
elements = sorted([el.symbol for el in c.keys()])

form_elements = []
if "C" in elements:
form_elements.append("C")
if "H" in elements:
form_elements.append("H")

form_elements.extend([el for el in elements if el != "C" and el != "H"])
else:
form_elements = elements

formula = [
"%s%s" % (el, formula_double_format(c[el]) if c[el] != 1 else "")
for el in form_elements
]
return "".join(formula)


class OptimadeMaterialsDoc(StructureResourceAttributes):
"""Optimade Structure resource with a few extra MP specific fields for materials"""

material_id: MPID
_mp_chemical_system: str

@classmethod
def from_structure(
cls, structure: Structure, material_id: MPID, last_updated: datetime, **kwargs
) -> StructureResourceAttributes:

structure.remove_oxidation_states()
return OptimadeMaterialsDoc(
material_id=material_id,
_mp_chemical_system=structure.composition.chemical_system,
elements=sorted(set([e.symbol for e in structure.composition.elements])),
nelements=len(structure.composition.elements),
elements_ratios=list(structure.composition.fractional_composition.values()),
chemical_formula_descriptive=optimade_form(structure.composition),
chemical_formula_reduced=optimade_form(
structure.composition.get_reduced_composition_and_factor()[0]
),
chemical_formula_anonymous=optimade_anonymous_form(structure.composition),
chemical_formula_hill=hill_formula(structure.composition),
dimension_types=[1, 1, 1],
nperiodic_dimensions=3,
lattice_vectors=structure.lattice.matrix.tolist(),
cartesian_site_positions=[site.coords.tolist() for site in structure],
nsites=len(structure),
species=list(
{
site.species_string: Species(
chemical_symbols=[site.species_string],
concentration=[1.0],
name=site.species_string,
)
for site in structure
}.values()
),
species_at_sites=[site.species_string for site in structure],
last_modified=last_updated,
structure_features=[],
**kwargs
)
1 change: 1 addition & 0 deletions emmet-core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ seekpath==2.0.1
setuptools-scm==6.0.1
robocrys==0.2.7
matminer==0.7.3
optimade==0.16.1
40 changes: 40 additions & 0 deletions tests/emmet-core/test_optimade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from datetime import datetime

import pytest
from pymatgen.core.structure import Structure
from pymatgen.util.testing import PymatgenTest

from emmet.core.optimade import OptimadeMaterialsDoc
from emmet.core.oxidation_states import OxidationStateDoc

test_structures = {
name: struc.get_reduced_structure()
for name, struc in PymatgenTest.TEST_STRUCTURES.items()
if name
in [
"SiO2",
"Li2O",
"LiFePO4",
"TlBiSe2",
"K2O2",
"Li3V2(PO4)3",
"CsCl",
"Li2O2",
"NaFePO4",
"Pb2TiZrO6",
"SrTiO3",
"TiO2",
"BaNiO3",
"VO2",
]
}


@pytest.mark.parametrize("structure", test_structures.values())
def test_oxidation_state(structure: Structure):
"""Very simple test to make sure this actually works"""
print(f"Should work : {structure.composition}")
doc = OptimadeMaterialsDoc.from_structure(
structure, material_id=33, last_updated=datetime.utcnow()
)
assert doc is not None