diff --git a/emmet-builders/emmet/builders/materials/provenance.py b/emmet-builders/emmet/builders/materials/provenance.py
index 2c3226a466..982cac6e68 100644
--- a/emmet-builders/emmet/builders/materials/provenance.py
+++ b/emmet-builders/emmet/builders/materials/provenance.py
@@ -36,7 +36,7 @@ def __init__(
         self.provenance = provenance
         self.source_snls = source_snls
         self.settings = EmmetBuildSettings.autoload(settings)
-        self.query = query
+        self.query = query or {}
         self.kwargs = kwargs
 
         materials.key = "material_id"
@@ -194,7 +194,7 @@ def process_item(self, item) -> List[Dict]:
                 doc.history.append(self.settings.DEFAULT_HISTORY)
                 doc.references.append(self.settings.DEFAULT_REFERENCE)
 
-                snl_docs.append(doc.dict())
+                snl_docs.append(doc.dict(exclude_unset=True))
 
         return snl_docs
 
@@ -211,25 +211,29 @@ def match(self, snls, mat):
         m_strucs = [Structure.from_dict(mat["structure"])] + [
             Structure.from_dict(init_struc) for init_struc in mat["initial_structures"]
         ]
-        snl_strucs = [StructureNL.from_dict(snl) for snl in snls]
+        snl_strucs = []
+        for snl in snls:
+            struc = Structure.from_dict(snl)
+            struc.snl = snl
+            snl_strucs.append(struc)
 
         groups = group_structures(
             m_strucs + snl_strucs,
             ltol=self.settings.LTOL,
             stol=self.settings.STOL,
             angle_tol=self.settings.ANGLE_TOL,
-            comparator=OrderDisorderElementComparator(),
+            # comparator=OrderDisorderElementComparator(),
         )
         matched_groups = [
             group
             for group in groups
-            if any(isinstance(struc, Structure) for struc in group)
+            if any(not hasattr(struc, "snl") for struc in group)
         ]
         snls = [
-            struc
+            struc.snl
             for group in matched_groups
             for struc in group
-            if isinstance(struc, StructureNL)
+            if hasattr(struc, "snl")
         ]
 
         self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}")
diff --git a/emmet-core/emmet/core/provenance.py b/emmet-core/emmet/core/provenance.py
index 18fc5e1f18..2d8145ab83 100644
--- a/emmet-core/emmet/core/provenance.py
+++ b/emmet-core/emmet/core/provenance.py
@@ -1,10 +1,12 @@
 """ Core definition of a Provenance Document """
 import warnings
-from datetime import datetime
+from datetime import date, datetime
 from typing import ClassVar, Dict, List, Optional
 
+from monty.json import MontyDecoder
 from pybtex.database import BibliographyData, parse_string
-from pydantic import BaseModel, EmailStr, Field, validator
+from pybtex.errors import set_strict_mode
+from pydantic import BaseModel, Field, root_validator, validator
 
 from emmet.core.material_property import PropertyDoc
 from emmet.core.mpid import MPID
@@ -27,7 +29,7 @@ class Author(BaseModel):
     """
 
     name: str = Field(None)
-    email: EmailStr = Field(None)
+    email: str = Field(None)
 
 
 class History(BaseModel):
@@ -41,6 +43,12 @@ class History(BaseModel):
         None, description="Dictionary of exra data for this history node"
     )
 
+    @root_validator(pre=True)
+    def str_to_dict(cls, values):
+        if isinstance(values.get("description"), str):
+            values["description"] = {"string": values.get("description")}
+        return values
+
 
 class ProvenanceDoc(PropertyDoc):
     """
@@ -95,33 +103,41 @@ def from_SNLs(
         Converts legacy Pymatgen SNLs into a single provenance document
         """
 
+        assert (
+            len(snls) > 0
+        ), "Error must provide a non-zero list of SNLs to convert from SNLs"
+
+        decoder = MontyDecoder()
         # Choose earliest created_at
         created_at = sorted(
-            [
-                snl.get("about", {}).get("created_at", {}).get("string", datetime.max)
-                for snl in snls
-            ]
+            decoder.process_decoded(
+                [snl.get("about", {}).get("created_at", datetime.max) for snl in snls]
+            )
         )[0]
 
         # Choose earliest history
         history = sorted(
             snls,
-            key=lambda snl: snl.get("about", {})
-            .get("created_at", {})
-            .get("string", datetime.max),
+            key=lambda snl: decoder.process_decoded(
+                snl.get("about", {}).get("created_at", datetime.max)
+            ),
         )[0]["about"]["history"]
 
         # Aggregate all references into one dict to remove duplicates
         refs = {}
         for snl in snls:
             try:
+                set_strict_mode(False)
                 entries = parse_string(snl["about"]["references"], bib_format="bibtex")
                 refs.update(entries.entries)
-            except Exception:
-                warnings.warn(f"Failed parsing bibtex: {snl['about']['references']}")
+            except Exception as e:
+                warnings.warn(
+                    f"Failed parsing bibtex: {snl['about']['references']} due to {e}"
+                )
 
         bib_data = BibliographyData(entries=refs)
-        references = [ref.to_string("bibtex") for ref in bib_data.entries]
+
+        references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]
 
         # TODO: Maybe we should combine this robocrystallographer?
         # TODO: Refine these tags / remarks
@@ -143,11 +159,11 @@ def from_SNLs(
         ]
 
         # Check if this entry is experimental
-        if any(
-            snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
+        experimental = any(
+            history.get("experimental", False)
             for snl in snls
-        ):
-            experimental = True
+            for history in snl.get("about", {}).get("history", [{}])
+        )
 
         # Aggregate all the database IDs
         snl_ids = [snl.get("snl_id", "") for snl in snls]
@@ -160,12 +176,6 @@ def from_SNLs(
         db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
         db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}
 
-        # Get experimental bool
-        experimental = any(
-            snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
-            for snl in snls
-        )
-
         snl_fields = {
             "created_at": created_at,
             "references": references,