Materials-Consortia · ml-evs · Feb 14, 2023 · Feb 9, 2023 · Feb 9, 2023 · Feb 9, 2023
diff --git a/README.md b/README.md
@@ -4,10 +4,7 @@
 <img width="100px" align="center" src="https://matsci.org/uploads/default/original/2X/b/bd2f59b3bf14fb046b74538750699d7da4c19ac1.svg">
 </div>
 
-<h1 align="center">
-OPTIMADE Python tools
-</h1>
-
+# <div align="center">OPTIMADE Python tools</div>
 
 <div align="center">
 
@@ -50,6 +47,7 @@ This is to enable interoperability among databases that serve crystal structures
 This repository contains a library of tools for implementing and consuming [OPTIMADE APIs](https://www.optimade.org) using Python:
 
 1. [pydantic](https://github.com/pydantic/pydantic) data models for all [OPTIMADE entry types](https://www.optimade.org/optimade-python-tools/latest/all_models/) and endpoint responses, and a [Lark](https://github.com/lark-parser/lark) [EBNF grammar](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form) implementation for the OPTIMADE filter language.
+1. Adapters to map OPTIMADE data to and from many commonly used atomistic Python frameworks (e.g., [pymatgen](https://pymatgen.org/), [ASE](https://wiki.fysik.dtu.dk/ase/)) and crystallographic file types (e.g., [CIF](https://www.iucr.org/resources/cif)), using the `optimade.adapters` module.
 1. A configurable reference server implementation that can make use of either MongoDB or Elasticsearch database backends out-of-the-box, and is readily extensible to other backends. Try it out on the [demo site](https://optimade.fly.dev)! The OpenAPI schemas of the server are used to construct the [OPTIMADE schemas](https://schemas.optimade.org/) site.
 1. An [OPTIMADE client](https://www.optimade.org/optimade-python-tools/latest/getting_started/client/) (`optimade-get`) that can query multiple [OPTIMADE providers](https://optimade.org/providers-dashboard) concurrently with a given filter, at the command-line or from Python code.
 1. A fuzzy API validator tool, which may be called from the shell (`optimade-validator`) or used as a GitHub Action from [optimade-validator-action](https://github.com/Materials-Consortia/optimade-validator-action); this validator is used to construct the [providers dashboard](https://optimade.org/providers-dashboard).

diff --git a/optimade/adapters/base.py b/optimade/adapters/base.py
@@ -34,6 +34,9 @@ class EntryAdapter:
     Attributes:
         ENTRY_RESOURCE: Entry resource to store entry as.
         _type_converters: Dictionary of valid conversion types for entry.
+        _type_ingesters: Dictionary of valid ingestion types mapped to ingestion functions.
+        _type_ingesters_by_type: Dictionary mapping the keys of `_type_ingesters` to data
+            types that can be ingested.
         as_<_type_converters>: Convert entry to a type listed in `_type_converters`.
         from_<_type_converters>: Convert an external type to the corresponding OPTIMADE model.
 
@@ -42,6 +45,7 @@ class EntryAdapter:
     ENTRY_RESOURCE: Type[EntryResource] = EntryResource
     _type_converters: Dict[str, Callable] = {}
     _type_ingesters: Dict[str, Callable] = {}
+    _type_ingesters_by_type: Dict[str, Type] = {}
 
     def __init__(self, entry: dict) -> None:
         """
@@ -116,6 +120,48 @@ def convert(self, format: str) -> Any:
 
         return self._converted[format]
 
+    @classmethod
+    def ingest_from(cls, data: Any, format: Optional[str] = None) -> Any:
+        """Convert desired format to OPTIMADE format.
+
+        Parameters:
+            data (Any): The data to convert.
+            format (str): Type or format to which the entry should be converted.
+
+        Raises:
+            AttributeError: If `format` can not be found in `_type_ingesters`.
+
+        Returns:
+            The ingested Structure.
+
+        """
+
+        if format is None:
+            for key, instance_type in cls._type_ingesters_by_type.items():
+                if isinstance(data, instance_type):
+                    format = key
+                    break
+
+            else:
+                raise AttributeError(
+                    f"Non entry type to data of type {type(data)} from.\n"
+                    f"Valid entry types: {tuple(cls._type_ingesters.keys())}"
+                )
+
+        if format not in cls._type_ingesters:
+            raise AttributeError(
+                f"Non-valid entry type to ingest from: {format}\n"
+                f"Valid entry types: {tuple(cls._type_ingesters.keys())}"
+            )
+
+        return cls(
+            {
+                "attributes": cls._type_ingesters[format](data).dict(),
+                "id": "",
+                "type": "structures",
+            }
+        )
+
     @staticmethod
     def _get_model_attributes(
         starting_instances: Union[Tuple[BaseModel, ...], List[BaseModel]], name: str

diff --git a/optimade/adapters/structures/adapter.py b/optimade/adapters/structures/adapter.py
@@ -4,10 +4,12 @@
 from optimade.models import StructureResource
 
 from .aiida import get_aiida_structure_data
-from .ase import get_ase_atoms
+from .ase import Atoms as ASEAtoms
+from .ase import from_ase_atoms, get_ase_atoms
 from .cif import get_cif
 from .jarvis import get_jarvis_atoms
 from .proteindatabank import get_pdb, get_pdbx_mmcif
+from .pymatgen import Structure as PymatgenStructure
 from .pymatgen import from_pymatgen, get_pymatgen
 
 
@@ -55,4 +57,10 @@ class Structure(EntryAdapter):
 
     _type_ingesters: Dict[str, Callable] = {
         "pymatgen": from_pymatgen,
+        "ase": from_ase_atoms,
+    }
+
+    _type_ingesters_by_type: Dict[str, Type] = {
+        "pymatgen": PymatgenStructure,
+        "ase": ASEAtoms,
     }
diff --git a/optimade/adapters/structures/ase.py b/optimade/adapters/structures/ase.py
@@ -10,10 +10,15 @@
 from typing import Dict
 
 from optimade.adapters.exceptions import ConversionError
-from optimade.adapters.structures.utils import species_from_species_at_sites
+from optimade.adapters.structures.utils import (
+    elements_ratios_from_species_at_sites,
+    species_from_species_at_sites,
+)
 from optimade.models import Species as OptimadeStructureSpecies
 from optimade.models import StructureFeatures
 from optimade.models import StructureResource as OptimadeStructure
+from optimade.models.structures import StructureResourceAttributes
+from optimade.models.utils import anonymize_formula, reduce_formula
 
 try:
     from ase import Atom, Atoms
@@ -26,7 +31,7 @@
     ASE_NOT_FOUND = "ASE not found, cannot convert structure to an ASE Atoms"
 
 
-__all__ = ("get_ase_atoms",)
+__all__ = ("get_ase_atoms", "from_ase_atoms")
 
 
 def get_ase_atoms(optimade_structure: OptimadeStructure) -> Atoms:
@@ -85,3 +90,52 @@ def get_ase_atoms(optimade_structure: OptimadeStructure) -> Atoms:
     return Atoms(
         symbols=atoms, cell=attributes.lattice_vectors, pbc=attributes.dimension_types
     )
+
+
+def from_ase_atoms(atoms: Atoms) -> StructureResourceAttributes:
+    """Convert an ASE `Atoms` object into an OPTIMADE `StructureResourceAttributes` model.
+
+    Parameters:
+        atoms: The ASE `Atoms` object to convert.
+
+    Returns:
+        An OPTIMADE `StructureResourceAttributes` model, which can be converted to a raw Python
+            dictionary with `.dict()` or to JSON with `.json()`.
+
+    """
+    if not isinstance(atoms, Atoms):
+        raise RuntimeError(
+            f"Cannot convert type {type(atoms)} into an OPTIMADE `StructureResourceAttributes` model."
+        )
+
+    attributes = {}
+    attributes["cartesian_site_positions"] = atoms.positions.tolist()
+    attributes["lattice_vectors"] = atoms.cell.tolist()
+    attributes["species_at_sites"] = atoms.get_chemical_symbols()
+    attributes["elements_ratios"] = elements_ratios_from_species_at_sites(
+        attributes["species_at_sites"]
+    )
+    attributes["species"] = species_from_species_at_sites(
+        attributes["species_at_sites"]
+    )
+    attributes["dimension_types"] = [int(_) for _ in atoms.pbc.tolist()]
+    attributes["nperiodic_dimensions"] = sum(attributes["dimension_types"])
+    attributes["nelements"] = len(attributes["species"])
+    attributes["elements"] = sorted([_.name for _ in attributes["species"]])
+    attributes["nsites"] = len(attributes["species_at_sites"])
+
+    attributes["chemical_formula_descriptive"] = atoms.get_chemical_formula()
+    attributes["chemical_formula_reduced"] = reduce_formula(
+        atoms.get_chemical_formula()
+    )
+    attributes["chemical_formula_anonymous"] = anonymize_formula(
+        attributes["chemical_formula_reduced"],
+    )
+    attributes["last_modified"] = None
+    attributes["immutable_id"] = None
+    attributes["structure_features"] = []
+
+    for key in atoms.info:
+        attributes[f"_ase_{key}".lower()] = atoms.info[key]
+
+    return StructureResourceAttributes(**attributes)
diff --git a/optimade/adapters/structures/pymatgen.py b/optimade/adapters/structures/pymatgen.py
@@ -16,9 +16,10 @@
 from optimade.models import Species as OptimadeStructureSpecies
 from optimade.models import StructureResource as OptimadeStructure
 from optimade.models import StructureResourceAttributes
+from optimade.models.utils import anonymize_formula, reduce_formula
 
 try:
-    from pymatgen.core import Composition, Lattice, Molecule, Structure
+    from pymatgen.core import Lattice, Molecule, Structure
 
 except (ImportError, ModuleNotFoundError):
     from warnings import warn
@@ -168,14 +169,14 @@ def from_pymatgen(pmg_structure: Structure) -> StructureResourceAttributes:
     attributes["dimension_types"] = [int(_) for _ in pmg_structure.lattice.pbc]
     attributes["nperiodic_dimensions"] = sum(attributes["dimension_types"])
     attributes["nelements"] = len(pmg_structure.composition.elements)
-    attributes["chemical_formula_anonymous"] = _pymatgen_anonymized_formula_to_optimade(
-        pmg_structure.composition
+    attributes["chemical_formula_anonymous"] = anonymize_formula(
+        pmg_structure.composition.formula
     )
     attributes["elements"] = sorted(
         [_.symbol for _ in pmg_structure.composition.elements]
     )
-    attributes["chemical_formula_reduced"] = _pymatgen_reduced_formula_to_optimade(
-        pmg_structure.composition
+    attributes["chemical_formula_reduced"] = reduce_formula(
+        pmg_structure.composition.formula
     )
     attributes["chemical_formula_descriptive"] = pmg_structure.composition.formula
     attributes["elements_ratios"] = [
@@ -188,33 +189,3 @@ def from_pymatgen(pmg_structure: Structure) -> StructureResourceAttributes:
     attributes["structure_features"] = []
 
     return StructureResourceAttributes(**attributes)
-
-
-def _pymatgen_anonymized_formula_to_optimade(composition: Composition) -> str:
-    """Construct an OPTIMADE `chemical_formula_anonymous` from a pymatgen `Composition`."""
-    import re
-
-    from optimade.models.utils import anonymous_element_generator
-
-    return "".join(
-        [
-            "".join(x)
-            for x in zip(
-                anonymous_element_generator(),
-                reversed(re.split("[A-Z]", composition.anonymized_formula)[1:]),
-            )
-        ]
-    )
-
-
-def _pymatgen_reduced_formula_to_optimade(composition: Composition) -> str:
-    """Construct an OPTIMADE `chemical_formula_reduced` from a pymatgen `Composition`."""
-    import numpy
-
-    numbers = [int(_) for _ in composition.to_reduced_dict.values()]
-    gcd = numpy.gcd.reduce(numbers)
-    return "".join(
-        _
-        + f"{int(composition.to_reduced_dict[_]) // gcd if composition.to_reduced_dict[_] // gcd > 1 else ''}"
-        for _ in sorted([_.symbol for _ in composition.elements])
-    )
diff --git a/optimade/adapters/structures/utils.py b/optimade/adapters/structures/utils.py
@@ -355,3 +355,14 @@ def species_from_species_at_sites(
         OptimadeStructureSpecies(name=_, concentration=[1.0], chemical_symbols=[_])
         for _ in set(species_at_sites)
     ]
+
+
+def elements_ratios_from_species_at_sites(species_at_sites: List[str]) -> List[float]:
+    """Compute the OPTIMADE `elements_ratios` field from `species_at_sites` in the case where `species_at_sites` refers
+    to sites wholly occupied by the given elements, e.g., not arbitrary species labels or with partial/mixed occupancy.
+
+    """
+    elements = set(species_at_sites)
+    counts = {e: species_at_sites.count(e) for e in elements}
+    num_sites = len(species_at_sites)
+    return [counts[e] / num_sites for e in sorted(elements)]
diff --git a/optimade/models/structures.py b/optimade/models/structures.py
@@ -1,10 +1,7 @@
 # pylint: disable=no-self-argument,line-too-long,no-name-in-module
-import math
 import re
-import sys
 import warnings
 from enum import Enum, IntEnum
-from functools import reduce
 from typing import List, Optional, Union
 
 from pydantic import BaseModel, conlist, root_validator, validator
@@ -18,6 +15,7 @@
     OptimadeField,
     StrictField,
     SupportLevel,
+    reduce_formula,
 )
 from optimade.warnings import MissingExpectedField
 
@@ -895,18 +893,10 @@ def check_reduced_formulae(cls, value, field):
         if value is None:
             return value
 
-        numbers = [n.strip() or 1 for n in re.split(r"[A-Z][a-z]*", value)]
-        # Need to remove leading 1 from split and convert to ints
-        numbers = [int(n) for n in numbers[1:]]
-
-        if sys.version_info[1] >= 9:
-            gcd = math.gcd(*numbers)
-        else:
-            gcd = reduce(math.gcd, numbers)
-
-        if gcd != 1:
+        reduced_formula = reduce_formula(value)
+        if reduced_formula != value:
             raise ValueError(
-                f"{field.name} {value!r} is not properly reduced: greatest common divisor was {gcd}, expected 1."
+                f"{field.name} {value!r} is not properly reduced: expected {reduced_formula!r}."
             )
 
         return value

diff --git a/optimade/models/utils.py b/optimade/models/utils.py
@@ -1,9 +1,11 @@
 import inspect
 import itertools
+import math
 import re
 import warnings
 from enum import Enum
-from typing import TYPE_CHECKING, Optional
+from functools import reduce
+from typing import TYPE_CHECKING, List, Optional
 
 from pydantic import Field
 from pydantic.fields import FieldInfo
@@ -228,6 +230,63 @@ def anonymous_element_generator():
             yield "".join(s)
 
 
+def _reduce_or_anonymize_formula(
+    formula: str, alphabetize: bool = True, anonymize: bool = False
+) -> str:
+    """Takes an input formula, reduces it and either alphabetizes or anonymizes it."""
+    import re
+    import sys
+
+    numbers: List[int] = [
+        int(n.strip() or 1) for n in re.split(r"[A-Z][a-z]*", formula)[1:]
+    ]
+    # Need to remove leading 1 from split and convert to ints
+
+    species = re.findall("[A-Z][a-z]*", formula)
+
+    if sys.version_info[1] >= 9:
+        gcd = math.gcd(*numbers)
+    else:
+        gcd = reduce(math.gcd, numbers)
+
+    if not len(species) == len(numbers):
+        raise ValueError(f"Something is wrong with the input formula: {formula}")
+
+    numbers = [n // gcd for n in numbers]
+
+    if anonymize:
+        numbers = sorted(numbers, reverse=True)
+        species = [s for _, s in zip(numbers, anonymous_element_generator())]
+
+    elif alphabetize:
+        species, numbers = zip(*sorted(zip(species, numbers)))
+
+    return "".join(f"{s}{n if n != 1 else ''}" for n, s in zip(numbers, species))
+
+
+def anonymize_formula(formula: str) -> str:
+    """Takes a string representation of a chemical formula of the form `[A-Z][a-z]*[0-9]*` (potentially with whitespace) and
+    returns the OPTIMADE `chemical_formula_anonymous` representation, i.e., a reduced chemical formula comprising of element symbols
+    drawn from A, B, C... ordered from largest proportion to smallest.
+
+    Returns:
+        The anonymous chemical formula in the OPTIMADE representation.
+
+    """
+    return _reduce_or_anonymize_formula(formula, alphabetize=False, anonymize=True)
+
+
+def reduce_formula(formula: str) -> str:
+    """Takes a string representation of a chemical formula of the form `[A-Z][a-z]*[0-9]*` (potentially with whitespace) and
+    reduces it by the GCD of the proportion integers present in the formula, stripping any leftover "1" values.
+
+    Returns:
+        The reduced chemical formula in the OPTIMADE representation.
+
+    """
+    return _reduce_or_anonymize_formula(formula, alphabetize=True, anonymize=False)
+
+
 ANONYMOUS_ELEMENTS = tuple(itertools.islice(anonymous_element_generator(), 150))
 """ Returns the first 150 values of the anonymous element generator. """