From 71136d7016e2bc1fee3450249e419983f7800226 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 20 Mar 2022 14:36:27 +0000 Subject: [PATCH] Make structure adapters infer species from species_at_sites when missing --- optimade/adapters/structures/aiida.py | 11 +++++- optimade/adapters/structures/ase.py | 14 ++++--- optimade/adapters/structures/pymatgen.py | 14 +++++-- optimade/adapters/structures/utils.py | 26 +++++++++++++ tests/adapters/structures/conftest.py | 6 +++ tests/adapters/structures/test_aiida.py | 5 +++ tests/adapters/structures/test_ase.py | 5 +++ tests/adapters/structures/test_pymatgen.py | 5 +++ tests/adapters/structures/test_utils.py | 45 ++++++++++++++++++++++ 9 files changed, 121 insertions(+), 10 deletions(-) diff --git a/optimade/adapters/structures/aiida.py b/optimade/adapters/structures/aiida.py index b894385b3..88a419233 100644 --- a/optimade/adapters/structures/aiida.py +++ b/optimade/adapters/structures/aiida.py @@ -8,11 +8,13 @@ This conversion function relies on the [`aiida-core`](https://github.com/aiidateam/aiida-core) package. """ from warnings import warn +from typing import List, Optional from optimade.models import StructureResource as OptimadeStructure +from optimade.models import Species as OptimadeStructureSpecies from optimade.adapters.warnings import AdapterPackageNotFound, ConversionWarning -from optimade.adapters.structures.utils import pad_cell +from optimade.adapters.structures.utils import pad_cell, species_from_species_at_sites try: from aiida.orm.nodes.data.structure import StructureData, Kind, Site @@ -46,8 +48,13 @@ def get_aiida_structure_data(optimade_structure: OptimadeStructure) -> Structure lattice_vectors, adjust_cell = pad_cell(attributes.lattice_vectors) structure = StructureData(cell=lattice_vectors) + # If species not provided, infer data from species_at_sites + species: Optional[List[OptimadeStructureSpecies]] = attributes.species + if not species: + species = species_from_species_at_sites(attributes.species_at_sites) + # Add Kinds - for kind in attributes.species: + for kind in species: symbols = [] concentration = [] mass = 0.0 diff --git a/optimade/adapters/structures/ase.py b/optimade/adapters/structures/ase.py index d618abaae..84c3bdf25 100644 --- a/optimade/adapters/structures/ase.py +++ b/optimade/adapters/structures/ase.py @@ -14,6 +14,7 @@ from optimade.models import StructureFeatures from optimade.adapters.exceptions import ConversionError +from optimade.adapters.structures.utils import species_from_species_at_sites try: from ase import Atoms, Atom @@ -53,13 +54,16 @@ def get_ase_atoms(optimade_structure: OptimadeStructure) -> Atoms: "ASE cannot handle structures with partial occupancies, sorry." ) - species: Dict[str, OptimadeStructureSpecies] = { - species.name: species for species in attributes.species - } + species = attributes.species + # If species is missing, infer data from species_at_sites + if not species: + species = species_from_species_at_sites(attributes.species_at_sites) + + optimade_species: Dict[str, OptimadeStructureSpecies] = {_.name: _ for _ in species} # Since we've made sure there are no species with more than 1 chemical symbol, # asking for index 0 will always work. - if "X" in [specie.chemical_symbols[0] for specie in species.values()]: + if "X" in [specie.chemical_symbols[0] for specie in optimade_species.values()]: raise ConversionError( "ASE cannot handle structures with unknown ('X') chemical symbols, sorry." ) @@ -69,7 +73,7 @@ def get_ase_atoms(optimade_structure: OptimadeStructure) -> Atoms: species_name = attributes.species_at_sites[site_number] site = attributes.cartesian_site_positions[site_number] - current_species = species[species_name] + current_species = optimade_species[species_name] # Argument above about chemical symbols also holds here mass = None diff --git a/optimade/adapters/structures/pymatgen.py b/optimade/adapters/structures/pymatgen.py index 54de008dc..5a79b2350 100644 --- a/optimade/adapters/structures/pymatgen.py +++ b/optimade/adapters/structures/pymatgen.py @@ -7,10 +7,11 @@ For more information on the pymatgen code see [their documentation](https://pymatgen.org). """ -from typing import Union, Dict, List +from typing import Union, Dict, List, Optional from optimade.models import Species as OptimadeStructureSpecies from optimade.models import StructureResource as OptimadeStructure +from optimade.adapters.structures.utils import species_from_species_at_sites try: from pymatgen.core import Structure, Molecule @@ -51,7 +52,9 @@ def get_pymatgen(optimade_structure: OptimadeStructure) -> Union[Structure, Mole warn(PYMATGEN_NOT_FOUND, AdapterPackageNotFound) return None - if all(optimade_structure.attributes.dimension_types): + if optimade_structure.attributes.nperiodic_dimensions == 3 or all( + optimade_structure.attributes.dimension_types + ): return _get_structure(optimade_structure) return _get_molecule(optimade_structure) @@ -90,12 +93,17 @@ def _get_molecule(optimade_structure: OptimadeStructure) -> Molecule: def _pymatgen_species( - nsites: int, species: List[OptimadeStructureSpecies], species_at_sites: List[str] + nsites: int, + species: Optional[List[OptimadeStructureSpecies]], + species_at_sites: List[str], ) -> List[Dict[str, float]]: """ Create list of {"symbol": "concentration"} per site for values to pymatgen species parameters. Remove vacancies, if they are present. """ + if not species: + # If species is missing, infer data from species_at_sites + species = species_from_species_at_sites(species_at_sites) optimade_species = {_.name: _ for _ in species} diff --git a/optimade/adapters/structures/utils.py b/optimade/adapters/structures/utils.py index 6b8acae4d..2e5f7773c 100644 --- a/optimade/adapters/structures/utils.py +++ b/optimade/adapters/structures/utils.py @@ -6,6 +6,7 @@ from typing import List, Tuple, Iterable from optimade.models.structures import Vector3D +from optimade.models.structures import Species as OptimadeStructureSpecies try: import numpy as np @@ -315,3 +316,28 @@ def pad_cell( outer=tuple, inner=tuple, ) + + +def species_from_species_at_sites( + species_at_sites: List[str], +) -> List[OptimadeStructureSpecies]: + """When a list of species dictionaries is not provided, this function + can be used to infer the species from the provided species_at_sites. + + In this use case, species_at_sites is assumed to provide a list of + element symbols, and refers to situations with no mixed occupancy, i.e., + the constructed species list will contain all unique species with + concentration equal to 1 and the species_at_site tag will be used as + the chemical symbol. + + Parameters: + species_at_sites: The list found under the species_at_sites field. + + Returns: + An OPTIMADE species list. + + """ + return [ + OptimadeStructureSpecies(name=_, concentration=[1.0], chemical_symbols=[_]) + for _ in set(species_at_sites) + ] diff --git a/tests/adapters/structures/conftest.py b/tests/adapters/structures/conftest.py index a14ede9a4..6af36dde0 100644 --- a/tests/adapters/structures/conftest.py +++ b/tests/adapters/structures/conftest.py @@ -49,3 +49,9 @@ def null_lattice_vector_structure(raw_structure) -> Structure: raw_structure["attributes"]["dimension_types"][0] = 0 raw_structure["attributes"]["nperiodic_dimensions"] = 2 return Structure(raw_structure) + + +@pytest.fixture +def null_species_structure(raw_structure) -> Structure: + raw_structure["attributes"]["species"] = None + return Structure(raw_structure) diff --git a/tests/adapters/structures/test_aiida.py b/tests/adapters/structures/test_aiida.py index 8b2b6c84b..4aeedf95c 100644 --- a/tests/adapters/structures/test_aiida.py +++ b/tests/adapters/structures/test_aiida.py @@ -82,3 +82,8 @@ def test_special_species(SPECIAL_SPECIES_STRUCTURES): ) else: assert aiida_structure.kinds[0].mass == 1.0 + + +def test_null_species(null_species_structure): + """Make sure null species are handled""" + assert isinstance(get_aiida_structure_data(null_species_structure), StructureData) diff --git a/tests/adapters/structures/test_ase.py b/tests/adapters/structures/test_ase.py index 13ed014d0..a2062b2ca 100644 --- a/tests/adapters/structures/test_ase.py +++ b/tests/adapters/structures/test_ase.py @@ -40,3 +40,8 @@ def test_special_species(SPECIAL_SPECIES_STRUCTURES): r"(ASE cannot handle structures with unknown \('X'\) chemical symbols)", ): get_ase_atoms(structure) + + +def test_null_species(null_species_structure): + """Make sure null species are handled""" + assert isinstance(get_ase_atoms(null_species_structure), Atoms) diff --git a/tests/adapters/structures/test_pymatgen.py b/tests/adapters/structures/test_pymatgen.py index 42553b0f9..01b4092ee 100644 --- a/tests/adapters/structures/test_pymatgen.py +++ b/tests/adapters/structures/test_pymatgen.py @@ -49,3 +49,8 @@ def test_special_species(SPECIAL_SPECIES_STRUCTURES): for special_structure in SPECIAL_SPECIES_STRUCTURES: structure = Structure(special_structure) assert isinstance(get_pymatgen(structure), PymatgenStructure) + + +def test_null_species(null_species_structure): + """Make sure null species are handled""" + assert isinstance(get_pymatgen(null_species_structure), PymatgenStructure) diff --git a/tests/adapters/structures/test_utils.py b/tests/adapters/structures/test_utils.py index e4b3c2a59..d3b04b2c8 100644 --- a/tests/adapters/structures/test_utils.py +++ b/tests/adapters/structures/test_utils.py @@ -16,6 +16,7 @@ fractional_coordinates, pad_cell, scaled_cell, + species_from_species_at_sites, ) @@ -115,3 +116,47 @@ def test_scaled_cell_consistency(structure): volume_from_scale = 1 / numpy.linalg.det(scale) assert volume_from_scale == pytest.approx(volume_from_cellpar) + + +def test_species_from_species_at_sites(structure): + """Test that species can be inferred from species_at_sites""" + species_at_sites = ["Si"] + assert [d.dict() for d in species_from_species_at_sites(species_at_sites)] == [ + { + "name": "Si", + "concentration": [1.0], + "chemical_symbols": ["Si"], + "attached": None, + "mass": None, + "original_name": None, + "nattached": None, + }, + ] + + species_at_sites = ["Si", "Si", "O", "O", "O", "O"] + assert sorted( + [d.dict() for d in species_from_species_at_sites(species_at_sites)], + key=lambda _: _["name"], + ) == sorted( + [ + { + "name": "O", + "concentration": [1.0], + "chemical_symbols": ["O"], + "attached": None, + "mass": None, + "original_name": None, + "nattached": None, + }, + { + "name": "Si", + "concentration": [1.0], + "chemical_symbols": ["Si"], + "attached": None, + "mass": None, + "original_name": None, + "nattached": None, + }, + ], + key=lambda _: _["name"], + )