Skip to content

Commit

Permalink
Merge branch 'main' into fix-builders
Browse files Browse the repository at this point in the history
  • Loading branch information
Shyam D committed Jul 12, 2021
2 parents c99bd02 + 517fc8d commit 6aee820
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 30 deletions.
18 changes: 11 additions & 7 deletions emmet-builders/emmet/builders/materials/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
self.provenance = provenance
self.source_snls = source_snls
self.settings = EmmetBuildSettings.autoload(settings)
self.query = query
self.query = query or {}
self.kwargs = kwargs

materials.key = "material_id"
Expand Down Expand Up @@ -194,7 +194,7 @@ def process_item(self, item) -> List[Dict]:
doc.history.append(self.settings.DEFAULT_HISTORY)
doc.references.append(self.settings.DEFAULT_REFERENCE)

snl_docs.append(doc.dict())
snl_docs.append(doc.dict(exclude_unset=True))

return snl_docs

Expand All @@ -211,25 +211,29 @@ def match(self, snls, mat):
m_strucs = [Structure.from_dict(mat["structure"])] + [
Structure.from_dict(init_struc) for init_struc in mat["initial_structures"]
]
snl_strucs = [StructureNL.from_dict(snl) for snl in snls]
snl_strucs = []
for snl in snls:
struc = Structure.from_dict(snl)
struc.snl = snl
snl_strucs.append(struc)

groups = group_structures(
m_strucs + snl_strucs,
ltol=self.settings.LTOL,
stol=self.settings.STOL,
angle_tol=self.settings.ANGLE_TOL,
comparator=OrderDisorderElementComparator(),
# comparator=OrderDisorderElementComparator(),
)
matched_groups = [
group
for group in groups
if any(isinstance(struc, Structure) for struc in group)
if any(not hasattr(struc, "snl") for struc in group)
]
snls = [
struc
struc.snl
for group in matched_groups
for struc in group
if isinstance(struc, StructureNL)
if hasattr(struc, "snl")
]

self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}")
Expand Down
56 changes: 33 additions & 23 deletions emmet-core/emmet/core/provenance.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
""" Core definition of a Provenance Document """
import warnings
from datetime import datetime
from datetime import date, datetime
from typing import ClassVar, Dict, List, Optional

from monty.json import MontyDecoder
from pybtex.database import BibliographyData, parse_string
from pydantic import BaseModel, EmailStr, Field, validator
from pybtex.errors import set_strict_mode
from pydantic import BaseModel, Field, root_validator, validator

from emmet.core.material_property import PropertyDoc
from emmet.core.mpid import MPID
Expand All @@ -27,7 +29,7 @@ class Author(BaseModel):
"""

name: str = Field(None)
email: EmailStr = Field(None)
email: str = Field(None)


class History(BaseModel):
Expand All @@ -41,6 +43,12 @@ class History(BaseModel):
None, description="Dictionary of exra data for this history node"
)

@root_validator(pre=True)
def str_to_dict(cls, values):
if isinstance(values.get("description"), str):
values["description"] = {"string": values.get("description")}
return values


class ProvenanceDoc(PropertyDoc):
"""
Expand Down Expand Up @@ -95,33 +103,41 @@ def from_SNLs(
Converts legacy Pymatgen SNLs into a single provenance document
"""

assert (
len(snls) > 0
), "Error must provide a non-zero list of SNLs to convert from SNLs"

decoder = MontyDecoder()
# Choose earliest created_at
created_at = sorted(
[
snl.get("about", {}).get("created_at", {}).get("string", datetime.max)
for snl in snls
]
decoder.process_decoded(
[snl.get("about", {}).get("created_at", datetime.max) for snl in snls]
)
)[0]

# Choose earliest history
history = sorted(
snls,
key=lambda snl: snl.get("about", {})
.get("created_at", {})
.get("string", datetime.max),
key=lambda snl: decoder.process_decoded(
snl.get("about", {}).get("created_at", datetime.max)
),
)[0]["about"]["history"]

# Aggregate all references into one dict to remove duplicates
refs = {}
for snl in snls:
try:
set_strict_mode(False)
entries = parse_string(snl["about"]["references"], bib_format="bibtex")
refs.update(entries.entries)
except Exception:
warnings.warn(f"Failed parsing bibtex: {snl['about']['references']}")
except Exception as e:
warnings.warn(
f"Failed parsing bibtex: {snl['about']['references']} due to {e}"
)

bib_data = BibliographyData(entries=refs)
references = [ref.to_string("bibtex") for ref in bib_data.entries]

references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]

# TODO: Maybe we should combine this robocrystallographer?
# TODO: Refine these tags / remarks
Expand All @@ -143,11 +159,11 @@ def from_SNLs(
]

# Check if this entry is experimental
if any(
snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
experimental = any(
history.get("experimental", False)
for snl in snls
):
experimental = True
for history in snl.get("about", {}).get("history", [{}])
)

# Aggregate all the database IDs
snl_ids = [snl.get("snl_id", "") for snl in snls]
Expand All @@ -160,12 +176,6 @@ def from_SNLs(
db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}

# Get experimental bool
experimental = any(
snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
for snl in snls
)

snl_fields = {
"created_at": created_at,
"references": references,
Expand Down

0 comments on commit 6aee820

Please sign in to comment.