-
Notifications
You must be signed in to change notification settings - Fork 876
/
Copy pathcomposition.py
1328 lines (1128 loc) · 55.3 KB
/
composition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""This module implements a Composition class to represent compositions,
and a ChemicalPotential class to represent potentials.
"""
from __future__ import annotations
import collections
import os
import re
import string
import warnings
from collections import defaultdict
from functools import total_ordering
from itertools import combinations_with_replacement, product
from math import isnan
from typing import TYPE_CHECKING, cast
from monty.fractions import gcd, gcd_float
from monty.json import MSONable
from monty.serialization import loadfn
from pymatgen.core.periodic_table import DummySpecies, Element, ElementType, Species, get_el_sp
from pymatgen.core.units import Mass
from pymatgen.util.string import Stringify, formula_double_format
if TYPE_CHECKING:
from collections.abc import Generator, Iterator
from typing import Any, ClassVar
from typing_extensions import Self
from pymatgen.util.typing import SpeciesLike
module_dir = os.path.dirname(os.path.abspath(__file__))
@total_ordering
class Composition(collections.abc.Hashable, collections.abc.Mapping, MSONable, Stringify):
"""Represents a Composition, which is essentially a {element:amount} mapping
type. Composition is written to be immutable and hashable,
unlike a standard Python dict.
Note that the key can be either an Element or a Species. Elements and Species
are treated differently. i.e., a Fe2+ is not the same as a Fe3+ Species and
would be put in separate keys. This differentiation is deliberate to
support using Composition to determine the fraction of a particular Species.
Works almost completely like a standard python dictionary, except that
__getitem__ is overridden to return 0 when an element is not found.
(somewhat like a defaultdict, except it is immutable).
Also adds more convenience methods relevant to compositions, e.g.
get_fraction.
It should also be noted that many Composition related functionality takes
in a standard string as a convenient input. For example,
even though the internal representation of a Fe2O3 composition is
{Element("Fe"): 2, Element("O"): 3}, you can obtain the amount of Fe
simply by comp["Fe"] instead of the more verbose comp[Element("Fe")].
>>> comp = Composition("LiFePO4")
>>> comp.get_atomic_fraction(Element("Li"))
0.14285714285714285
>>> comp.num_atoms
7.0
>>> comp.reduced_formula
'LiFePO4'
>>> comp.formula
'Li1 Fe1 P1 O4'
>>> comp.get_wt_fraction(Element("Li"))
0.04399794666951898
>>> comp.num_atoms
7.0
"""
# Tolerance in distinguishing different composition amounts.
# 1e-8 is fairly tight, but should cut out most floating point arithmetic
# errors.
amount_tolerance = 1e-8
charge_balanced_tolerance = 1e-8
# Special formula handling for peroxides and certain elements. This is so
# that formula output does not write LiO instead of Li2O2 for example.
special_formulas: ClassVar[dict[str, str]] = dict(
LiO="Li2O2",
NaO="Na2O2",
KO="K2O2",
HO="H2O2",
CsO="Cs2O2",
RbO="Rb2O2",
O="O2",
N="N2",
F="F2",
Cl="Cl2",
H="H2",
)
oxi_prob = None # prior probability of oxidation used by oxi_state_guesses
def __init__(self, *args, strict: bool = False, **kwargs) -> None:
"""Very flexible Composition construction, similar to the built-in Python
dict(). Also extended to allow simple string init.
Takes any inputs supported by the Python built-in dict function.
1. A dict of either {Element/Species: amount},
{string symbol:amount}, or {atomic number:amount} or any mixture
of these. e.g. {Element("Li"): 2, Element("O"): 1},
{"Li":2, "O":1}, {3: 2, 8: 1} all result in a Li2O composition.
2. Keyword arg initialization, similar to a dict, e.g.
Composition(Li = 2, O = 1)
In addition, the Composition constructor also allows a single
string as an input formula. e.g. Composition("Li2O").
Args:
*args: Any number of 2-tuples as key-value pairs.
strict (bool): Only allow valid Elements and Species in the Composition. Defaults to False.
allow_negative (bool): Whether to allow negative compositions. Defaults to False.
**kwargs: Additional kwargs supported by the dict() constructor.
"""
# allow_negative must be popped from **kwargs due to *args ambiguity
self.allow_negative = kwargs.pop("allow_negative", False)
# it's much faster to recognize a composition and use the el_map than
# to pass the composition to {}
if len(args) == 1 and isinstance(args[0], type(self)):
elem_map = args[0]
elif len(args) == 1 and isinstance(args[0], str):
elem_map = self._parse_formula(args[0]) # type: ignore[assignment]
elif len(args) == 1 and isinstance(args[0], float) and isnan(args[0]):
raise ValueError("float('NaN') is not a valid Composition, did you mean 'NaN'?")
else:
elem_map = dict(*args, **kwargs) # type: ignore[assignment]
elem_amt = {}
self._n_atoms = 0
for key, val in elem_map.items():
if val < -type(self).amount_tolerance and not self.allow_negative:
raise ValueError("Amounts in Composition cannot be negative!")
if abs(val) >= type(self).amount_tolerance:
elem_amt[get_el_sp(key)] = val
self._n_atoms += abs(val)
self._data = elem_amt
if strict and not self.valid:
raise ValueError(f"Composition is not valid, contains: {', '.join(map(str, self.elements))}")
def __getitem__(self, key: SpeciesLike) -> float:
try:
sp = get_el_sp(key)
if isinstance(sp, Species):
return self._data.get(sp, 0)
# sp is Element or str
return sum(
val for key, val in self._data.items() if getattr(key, "symbol", key) == getattr(sp, "symbol", sp)
)
except ValueError as exc:
raise KeyError(f"Invalid {key=}") from exc
def __len__(self) -> int:
return len(self._data)
def __iter__(self) -> Iterator[Species | Element | DummySpecies]:
return iter(self._data)
def __contains__(self, key) -> bool:
try:
sp = get_el_sp(key)
if isinstance(sp, Species):
return sp in self._data
# key is Element or str
return any(sp.symbol == s.symbol for s in self._data)
except ValueError as exc:
raise TypeError(f"Invalid {key=} for Composition") from exc
def __eq__(self, other: object) -> bool:
"""Composition equality. We consider compositions equal if they have the
same elements and the amounts are within Composition.amount_tolerance
of each other.
Args:
other: Composition to compare to.
"""
if not isinstance(other, (type(self), dict)):
return NotImplemented
# elements with amounts < Composition.amount_tolerance don't show up
# in the el_map, so checking len enables us to only check one
# composition's elements
if len(self) != len(other):
return False
return all(abs(amt - other[el]) <= type(self).amount_tolerance for el, amt in self.items())
def __ge__(self, other: object) -> bool:
"""Composition greater than or equal to. We consider compositions A >= B
if all elements in B are in A and the amount of each element in A is
greater than or equal to the amount of the element in B within
Composition.amount_tolerance.
Should ONLY be used for defining a sort order (the behavior is probably not what you'd expect).
"""
if not isinstance(other, type(self)):
return NotImplemented
for el in sorted(set(self.elements + other.elements)):
if other[el] - self[el] >= type(self).amount_tolerance:
return False
# TODO @janosh 2024-04-29: is this a bug? why would we return True early?
if self[el] - other[el] >= type(self).amount_tolerance:
return True
return True
def __add__(self, other: object) -> Self:
"""Add two compositions. For example, an Fe2O3 composition + an FeO
composition gives a Fe3O4 composition.
"""
if not isinstance(other, (type(self), dict)):
return NotImplemented
new_el_map: dict[SpeciesLike, float] = defaultdict(float)
new_el_map.update(self)
for key, val in other.items():
new_el_map[get_el_sp(key)] += val
return type(self)(new_el_map, allow_negative=self.allow_negative)
def __sub__(self, other: object) -> Self:
"""Subtracts two compositions. For example, an Fe2O3 composition - an FeO
composition gives an FeO2 composition.
Raises:
ValueError if the subtracted composition is greater than the
original composition in any of its elements, unless allow_negative
is True
"""
if not isinstance(other, (type(self), dict)):
return NotImplemented
new_el_map: dict[SpeciesLike, float] = defaultdict(float)
new_el_map.update(self)
for key, val in other.items():
new_el_map[get_el_sp(key)] -= val
return type(self)(new_el_map, allow_negative=self.allow_negative)
def __mul__(self, other: object) -> Self:
"""Multiply a Composition by an integer or a float.
Fe2O3 * 4 -> Fe8O12.
"""
if not isinstance(other, (int, float)):
return NotImplemented
return type(self)({el: self[el] * other for el in self}, allow_negative=self.allow_negative)
__rmul__ = __mul__
def __truediv__(self, other: object) -> Self:
if not isinstance(other, (int, float)):
return NotImplemented
return type(self)({el: self[el] / other for el in self}, allow_negative=self.allow_negative)
__div__ = __truediv__
def __hash__(self) -> int:
"""Hash based on the chemical system."""
return hash(frozenset(self._data))
def __repr__(self) -> str:
formula = " ".join(f"{key}{':' if hasattr(key, 'oxi_state') else ''}{val:g}" for key, val in self.items())
cls_name = type(self).__name__
return f"{cls_name}({formula!r})"
def __str__(self) -> str:
return " ".join(f"{key}{formula_double_format(val, ignore_ones=False)}" for key, val in self.as_dict().items())
def to_pretty_string(self) -> str:
"""
Returns:
str: Same output as __str__() but without spaces.
"""
return re.sub(r"\s+", "", str(self))
@property
def average_electroneg(self) -> float:
"""Average electronegativity of the composition."""
return sum((el.X * abs(amt) for el, amt in self.items())) / self.num_atoms
@property
def total_electrons(self) -> float:
"""Total number of electrons in composition."""
return sum((el.Z * abs(amt) for el, amt in self.items()))
def almost_equals(
self,
other: Composition,
rtol: float = 0.1,
atol: float = 1e-8,
) -> bool:
"""Get true if compositions are equal within a tolerance.
Args:
other (Composition): Other composition to check
rtol (float): Relative tolerance
atol (float): Absolute tolerance
"""
sps = set(self.elements + other.elements)
for sp in sps:
a = self[sp]
b = other[sp]
tol = atol + rtol * (abs(a) + abs(b)) / 2
if abs(b - a) > tol:
return False
return True
@property
def is_element(self) -> bool:
"""True if composition is an element."""
return len(self) == 1
def copy(self) -> Self:
"""A copy of the composition."""
return type(self)(self, allow_negative=self.allow_negative)
@property
def formula(self) -> str:
"""A formula string, with elements sorted by electronegativity,
e.g. Li4 Fe4 P4 O16.
"""
sym_amt = self.get_el_amt_dict()
syms = sorted(sym_amt, key=lambda sym: get_el_sp(sym).X)
formula = [f"{s}{formula_double_format(sym_amt[s], ignore_ones= False)}" for s in syms]
return " ".join(formula)
@property
def alphabetical_formula(self) -> str:
"""A formula string, with elements sorted by alphabetically
e.g. Fe4 Li4 O16 P4.
"""
return " ".join(sorted(self.formula.split()))
@property
def iupac_formula(self) -> str:
"""A formula string, with elements sorted by the IUPAC
electronegativity ordering defined in Table VI of "Nomenclature of
Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering
effectively follows the groups and rows of the periodic table, except
the Lanthanides, Actinides and hydrogen. Polyanions are still determined
based on the true electronegativity of the elements.
e.g. CH2(SO4)2.
"""
sym_amt = self.get_el_amt_dict()
syms = sorted(sym_amt, key=lambda s: get_el_sp(s).iupac_ordering)
formula = [f"{s}{formula_double_format(sym_amt[s], ignore_ones= False)}" for s in syms]
return " ".join(formula)
@property
def element_composition(self) -> Self:
"""The composition replacing any species by the corresponding element."""
return type(self)(self.get_el_amt_dict(), allow_negative=self.allow_negative)
@property
def fractional_composition(self) -> Self:
"""The normalized composition in which the amounts of each species sum to
1.
E.g. "Fe2 O3".fractional_composition = "Fe0.4 O0.6".
"""
return self / self._n_atoms
@property
def reduced_composition(self) -> Self:
"""The reduced composition, i.e. amounts normalized by greatest common denominator.
E.g. "Fe4 P4 O16".reduced_composition = "Fe P O4".
"""
return self.get_reduced_composition_and_factor()[0]
def get_reduced_composition_and_factor(self) -> tuple[Self, float]:
"""Calculate a reduced composition and factor.
Returns:
A normalized composition and a multiplicative factor, i.e.,
Li4Fe4P4O16 returns (Composition("LiFePO4"), 4).
"""
factor = self.get_reduced_formula_and_factor()[1]
return self / factor, factor
def get_reduced_formula_and_factor(self, iupac_ordering: bool = False) -> tuple[str, float]:
"""Calculate a reduced formula and factor.
Args:
iupac_ordering (bool, optional): Whether to order the
formula by the iupac "electronegativity" series, defined in
Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
Recommendations 2005)". This ordering effectively follows
the groups and rows of the periodic table, except the
Lanthanides, Actinides and hydrogen. Note that polyanions
will still be determined based on the true electronegativity of
the elements.
Returns:
A pretty normalized formula and a multiplicative factor, i.e.,
Li4Fe4P4O16 returns (LiFePO4, 4).
"""
all_int = all(abs(val - round(val)) < type(self).amount_tolerance for val in self.values())
if not all_int:
return self.formula.replace(" ", ""), 1
el_amt_dict = {key: int(round(val)) for key, val in self.get_el_amt_dict().items()}
formula, factor = reduce_formula(el_amt_dict, iupac_ordering=iupac_ordering)
if formula in type(self).special_formulas:
formula = type(self).special_formulas[formula]
factor /= 2
return formula, factor
def get_integer_formula_and_factor(
self, max_denominator: int = 10000, iupac_ordering: bool = False
) -> tuple[str, float]:
"""Calculate an integer formula and factor.
Args:
max_denominator (int): all amounts in the el:amt dict are
first converted to a Fraction with this maximum denominator
iupac_ordering (bool, optional): Whether to order the
formula by the iupac "electronegativity" series, defined in
Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
Recommendations 2005)". This ordering effectively follows
the groups and rows of the periodic table, except the
Lanthanides, Actinides and hydrogen. Note that polyanions
will still be determined based on the true electronegativity of
the elements.
Returns:
A pretty normalized formula and a multiplicative factor, i.e.,
Li0.5O0.25 returns (Li2O, 0.25). O0.25 returns (O2, 0.125)
"""
el_amt = self.get_el_amt_dict()
_gcd = gcd_float(list(el_amt.values()), 1 / max_denominator)
dct = {key: round(val / _gcd) for key, val in el_amt.items()}
formula, factor = reduce_formula(dct, iupac_ordering=iupac_ordering)
if formula in type(self).special_formulas:
formula = type(self).special_formulas[formula]
factor /= 2
return formula, factor * _gcd
@property
def reduced_formula(self) -> str:
"""A pretty normalized formula, i.e., LiFePO4 instead of
Li4Fe4P4O16.
"""
return self.get_reduced_formula_and_factor()[0]
@property
def hill_formula(self) -> str:
"""The Hill system (or Hill notation) is a system of writing empirical chemical
formulas, molecular chemical formulas and components of a condensed formula such
that the number of carbon atoms in a molecule is indicated first, the number of
hydrogen atoms next, and then the number of all other chemical elements
subsequently, in alphabetical order of the chemical symbols. When the formula
contains no carbon, all the elements, including hydrogen, are listed
alphabetically.
"""
elem_comp = self.element_composition
elements = sorted(el.symbol for el in elem_comp)
hill_elements = []
if "C" in elements:
hill_elements.append("C")
elements.remove("C")
if "H" in elements:
hill_elements.append("H")
elements.remove("H")
hill_elements += elements
formula = [f"{el}{formula_double_format(elem_comp[el]) if elem_comp[el] != 1 else ''}" for el in hill_elements]
return " ".join(formula)
@property
def elements(self) -> list[Element | Species | DummySpecies]:
"""List of elements in Composition."""
return list(self)
@property
def chemical_system_set(self) -> set[str]:
"""The set of elements in the Composition. E.g. {"O", "Si"} for SiO2."""
return {el.symbol for el in self.elements}
@property
def chemical_system(self) -> str:
"""The chemical system of a Composition, for example "O-Si" for
SiO2. Chemical system is a string of a list of elements
sorted alphabetically and joined by dashes, by convention for use
in database keys.
"""
return "-".join(sorted(el.symbol for el in self.elements))
@property
def num_atoms(self) -> float:
"""Total number of atoms in Composition. For negative amounts, sum
of absolute values.
"""
return self._n_atoms
@property
def weight(self) -> float:
"""Total molecular weight of Composition."""
return Mass(sum(amount * el.atomic_mass for el, amount in self.items()), "amu")
def get_atomic_fraction(self, el: SpeciesLike) -> float:
"""Calculate atomic fraction of an Element or Species.
Args:
el (Element/Species): Element or Species to get fraction for.
Returns:
Atomic fraction for element el in Composition
"""
return abs(self[el]) / self._n_atoms
def get_wt_fraction(self, el: SpeciesLike) -> float:
"""Calculate weight fraction of an Element or Species.
Args:
el (Element | Species): Element or Species to get fraction for.
Returns:
float: Weight fraction for element el in Composition.
"""
el_mass = cast(float, get_el_sp(el).atomic_mass)
return el_mass * abs(self[el]) / self.weight
def contains_element_type(self, category: str) -> bool:
"""Check if Composition contains any elements matching a given category.
Args:
category (str): one of "noble_gas", "transition_metal",
"post_transition_metal", "rare_earth_metal", "metal", "metalloid",
"alkali", "alkaline", "halogen", "chalcogen", "lanthanoid",
"actinoid", "radioactive", "quadrupolar", "s-block", "p-block", "d-block", "f-block".
Returns:
bool: Whether any elements in Composition match category.
"""
allowed_categories = [element.value for element in ElementType]
if category not in allowed_categories:
raise ValueError(f"Invalid {category=}, pick from {allowed_categories}")
if "block" in category:
return category[0] in [el.block for el in self.elements]
return any(getattr(el, f"is_{category}") for el in self.elements)
def _parse_formula(self, formula: str, strict: bool = True) -> dict[str, float]:
"""
Args:
formula (str): A string formula, e.g. Fe2O3, Li3Fe2(PO4)3.
strict (bool): Whether to throw an error if formula string is invalid (e.g. empty).
Defaults to True.
Returns:
Composition with that formula.
Notes:
In the case of Metallofullerene formula (e.g. Y3N@C80),
the @ mark will be dropped and passed to parser.
"""
# Raise error if formula contains special characters or only spaces and/or numbers
if strict and re.match(r"[\s\d.*/]*$", formula):
raise ValueError(f"Invalid {formula=}")
# For Metallofullerene like "Y3N@C80"
formula = formula.replace("@", "")
# Square brackets are used in formulas to denote coordination complexes (gh-3583)
formula = formula.replace("[", "(")
formula = formula.replace("]", ")")
def get_sym_dict(form: str, factor: float) -> dict[str, float]:
sym_dict: dict[str, float] = defaultdict(float)
for match in re.finditer(r"([A-Z][a-z]*)\s*([-*\.e\d]*)", form):
el = match[1]
amt = 1.0
if match[2].strip() != "":
amt = float(match[2])
sym_dict[el] += amt * factor
form = form.replace(match.group(), "", 1)
if form.strip():
raise ValueError(f"{form} is an invalid formula!")
return sym_dict
match = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
while match:
factor = 1.0
if match[2] != "":
factor = float(match[2])
unit_sym_dict = get_sym_dict(match[1], factor)
expanded_sym = "".join(f"{el}{amt}" for el, amt in unit_sym_dict.items())
expanded_formula = formula.replace(match.group(), expanded_sym, 1)
formula = expanded_formula
match = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
return get_sym_dict(formula, 1)
@property
def anonymized_formula(self) -> str:
"""An anonymized formula. Unique species are arranged in ordering of
increasing amounts and assigned ascending alphabets. Useful for
prototyping formulas. For example, all stoichiometric perovskites have
anonymized_formula ABC3.
"""
reduced = self.element_composition
if all(val == int(val) for val in self.values()):
reduced /= gcd(*(int(i) for i in self.values()))
anon = ""
for elem, amt in zip(string.ascii_uppercase, sorted(reduced.values())):
if amt == 1:
amt_str = ""
elif abs(amt % 1) < 1e-8:
amt_str = str(int(amt))
else:
amt_str = str(amt)
anon += f"{elem}{amt_str}"
return anon
@property
def valid(self) -> bool:
"""True if Composition contains valid elements or species and
False if the Composition contains any dummy species.
"""
return not any(isinstance(el, DummySpecies) for el in self.elements)
@classmethod
def from_dict(cls, dct: dict) -> Self:
"""Create a composition from a dict generated by as_dict(). Strictly not
necessary given that the standard constructor already takes in such an
input, but this method preserves the standard pymatgen API of having
from_dict methods to reconstitute objects generated by as_dict(). Allows
for easier introspection.
Args:
dct (dict): {symbol: amount} dict.
"""
return cls(dct)
@classmethod
def from_weight_dict(cls, weight_dict: dict[SpeciesLike, float]) -> Self:
"""Create a Composition based on a dict of atomic fractions calculated
from a dict of weight fractions. Allows for quick creation of the class
from weight-based notations commonly used in the industry, such as
Ti6V4Al and Ni60Ti40.
Args:
weight_dict (dict): {symbol: weight_fraction} dict.
Returns:
Composition
"""
weight_sum = sum(val / Element(el).atomic_mass for el, val in weight_dict.items())
comp_dict = {el: val / Element(el).atomic_mass / weight_sum for el, val in weight_dict.items()}
return cls(comp_dict)
def get_el_amt_dict(self) -> dict[str, float]:
"""
Returns:
dict[str, float]: element symbol and (unreduced) amount. E.g.
{"Fe": 4.0, "O": 6.0}.
"""
dct: dict[str, float] = defaultdict(float)
for el, amt in self.items():
dct[el.symbol] += amt
return dict(dct)
def as_dict(self) -> dict[str, float]:
"""Subtly different from get_el_amt_dict in that they keys here are str(Element)
instead of Element.symbol.
Returns:
dict[str, float]: element symbol and (unreduced) amount. E.g.
{"Fe": 4.0, "O": 6.0} or {"Fe3+": 4.0, "O2-": 6.0}
"""
dct: dict[str, float] = defaultdict(float)
for el, amt in self.items():
dct[str(el)] += amt
return dict(dct)
@property
def to_reduced_dict(self) -> dict[str, float]:
"""
Returns:
dict[str, float]: element symbols mapped to reduced amount e.g. {"Fe": 2.0, "O":3.0}.
"""
return self.reduced_composition.as_dict()
@property
def to_weight_dict(self) -> dict[str, float]:
"""
Returns:
dict[str, float] with weight fraction of each component {"Ti": 0.90, "V": 0.06, "Al": 0.04}.
"""
return {str(el): self.get_wt_fraction(el) for el in self.elements}
@property
def to_data_dict(self) -> dict[str, Any]:
"""
Returns:
A dict with many keys and values relating to Composition/Formula,
including reduced_cell_composition, unit_cell_composition,
reduced_cell_formula, elements and nelements.
"""
return {
"reduced_cell_composition": self.reduced_composition,
"unit_cell_composition": self.as_dict(),
"reduced_cell_formula": self.reduced_formula,
"elements": list(map(str, self)),
"nelements": len(self),
}
@property
def charge(self) -> float | None:
"""Total charge based on oxidation states. If any oxidation states
are None or they're all 0, returns None. Use add_charges_from_oxi_state_guesses to
assign oxidation states to elements based on charge balancing.
"""
warnings.warn(
"Composition.charge is experimental and may produce incorrect results. Use with "
"caution and open a GitHub issue pinging @janosh to report bad behavior."
)
oxi_states = [getattr(specie, "oxi_state", None) for specie in self]
if {*oxi_states} <= {0, None}:
# all oxidation states are None or 0
return None
return sum(oxi * amt for oxi, amt in zip(oxi_states, self.values()))
@property
def charge_balanced(self) -> bool | None:
"""True if composition is charge balanced, False otherwise. If any oxidation states
are None, returns None. Use add_charges_from_oxi_state_guesses to assign oxidation
states to elements.
"""
warnings.warn(
"Composition.charge_balanced is experimental and may produce incorrect results. "
"Use with caution and open a GitHub issue pinging @janosh to report bad behavior."
)
if self.charge is None:
if {getattr(el, "oxi_state", None) for el in self} == {0}:
# all oxidation states are 0. this usually means no way of combining oxidation states
# to get a zero charge was found, so the composition is not charge balanced
return False
return None
return abs(self.charge) < type(self).charge_balanced_tolerance
def oxi_state_guesses(
self,
oxi_states_override: dict | None = None,
target_charge: float = 0,
all_oxi_states: bool = False,
max_sites: int | None = None,
) -> tuple[dict[str, float]]:
"""Check if the composition is charge-balanced and returns back all
charge-balanced oxidation state combinations. Composition must have
integer values. Note that more num_atoms in the composition gives
more degrees of freedom. e.g. if possible oxidation states of
element X are [2,4] and Y are [-3], then XY is not charge balanced
but X2Y2 is. Results are returned from most to least probable based
on ICSD statistics. Use max_sites to improve performance if needed.
Args:
oxi_states_override (dict): dict of str->list to override an
element's common oxidation states, e.g. {"V": [2,3,4,5]}
target_charge (int): the desired total charge on the structure.
Default is 0 signifying charge balance.
all_oxi_states (bool): if True, an element defaults to
all oxidation states in pymatgen Element.icsd_oxidation_states.
Otherwise, default is Element.common_oxidation_states. Note
that the full oxidation state list is *very* inclusive and
can produce nonsensical results.
max_sites (int): if possible, will reduce Compositions to at most
this many sites to speed up oxidation state guesses. If the
composition cannot be reduced to this many sites a ValueError
will be raised. Set to -1 to just reduce fully. If set to a
number less than -1, the formula will be fully reduced but a
ValueError will be thrown if the number of atoms in the reduced
formula is greater than abs(max_sites).
Returns:
list[dict]: each dict reports an element symbol and average
oxidation state across all sites in that composition. If the
composition is not charge balanced, an empty list is returned.
"""
if len(self.elements) == 1:
return ({self.elements[0].symbol: 0.0},)
return self._get_oxi_state_guesses(all_oxi_states, max_sites, oxi_states_override, target_charge)[0]
def replace(self, elem_map: dict[str, str | dict[str, float]]) -> Self:
"""Replace elements in a composition. Returns a new Composition, leaving the old one unchanged.
Args:
elem_map (dict[str, str | dict[str, float]]): dict of elements or species to swap. E.g.
{"Li": "Na"} performs a Li for Na substitution. The target can be a {species: factor} dict. For
example, in Fe2O3 you could map {"Fe": {"Mg": 0.5, "Cu":0.5}} to obtain MgCuO3.
Returns:
Composition: New object with elements remapped according to elem_map.
"""
# Drop inapplicable substitutions
invalid_elems = [key for key in elem_map if key not in self]
if invalid_elems:
warnings.warn(
"Some elements to be substituted are not present in composition. Please check your input. "
f"Problematic element = {invalid_elems}; {self}"
)
for elem in invalid_elems:
elem_map.pop(elem)
# Start with elements that remain unchanged (not in elem_map)
new_comp = {elem: amount for elem, amount in self.as_dict().items() if elem not in elem_map}
for old_elem, new_elem in elem_map.items():
amount = self[old_elem]
# Build a dictionary of substitutions to be made
subs = {}
if isinstance(new_elem, dict):
for el, factor in new_elem.items():
subs[el] = factor * amount
else:
subs = {new_elem: amount}
# Apply the substitutions to the new composition
for el, amt in subs.items():
if el in new_comp:
new_comp[el] += amt
else:
new_comp[el] = amt
# Check for ambiguous input (see issue #2553)
if el in self:
warnings.warn(
f"Same element ({el}) in both the keys and values of the substitution!"
"This can be ambiguous, so be sure to check your result."
)
return type(self)(new_comp)
def add_charges_from_oxi_state_guesses(
self,
oxi_states_override: dict | None = None,
target_charge: float = 0,
all_oxi_states: bool = False,
max_sites: int | None = None,
) -> Self:
"""Assign oxidation states based on guessed oxidation states.
See `oxi_state_guesses` for an explanation of how oxidation states are
guessed. This operation uses the set of oxidation states for each site
that were determined to be most likely from the oxidation state guessing
routine.
Args:
oxi_states_override (dict[str, list[float]]): Override an
element's common oxidation states, e.g. {"V": [2, 3, 4, 5]}
target_charge (float): the desired total charge on the structure.
Default is 0 signifying charge balance.
all_oxi_states (bool): If True, an element defaults to
all oxidation states in pymatgen Element.icsd_oxidation_states.
Otherwise, default is Element.common_oxidation_states. Note
that the full oxidation state list is *very* inclusive and
can produce nonsensical results.
max_sites (int): If possible, will reduce Compositions to at most
this many sites to speed up oxidation state guesses. If the
composition cannot be reduced to this many sites a ValueError
will be raised. Set to -1 to just reduce fully. If set to a
number less than -1, the formula will be fully reduced but a
ValueError will be thrown if the number of atoms in the reduced
formula is greater than abs(max_sites).
Returns:
Composition, where the elements are assigned oxidation states based
on the results form guessing oxidation states. If no oxidation state
is possible, returns a Composition where all oxidation states are 0.
"""
_, oxidation_states = self._get_oxi_state_guesses(all_oxi_states, max_sites, oxi_states_override, target_charge)
# Special case: No charged compound is possible
if not oxidation_states:
return type(self)({Species(e, 0): f for e, f in self.items()})
# Generate the species
species = []
for el, charges in oxidation_states[0].items():
species.extend([Species(el, c) for c in charges])
# Return the new object
return type(self)(collections.Counter(species))
def remove_charges(self) -> Self:
"""Get a new Composition with charges from each Species removed.
Returns:
Composition object without charge decoration, for example
{"Fe3+": 2.0, "O2-":3.0} becomes {"Fe": 2.0, "O":3.0}
"""
dct: dict[Element, float] = defaultdict(float)
for specie, amt in self.items():
dct[Element(specie.symbol)] += amt
return type(self)(dct)
def _get_oxi_state_guesses(
self,
all_oxi_states: bool,
max_sites: int | None,
oxi_states_override: dict[str, list] | None,
target_charge: float,
) -> tuple[tuple, tuple]:
"""Utility operation for guessing oxidation states.
See `oxi_state_guesses` for full details. This operation does the
calculation of the most likely oxidation states
Args:
oxi_states_override (dict): dict of str->list to override an
element's common oxidation states, e.g. {"V": [2,3,4,5]}
target_charge (float): the desired total charge on the structure.
Default is 0 signifying charge balance.
all_oxi_states (bool): if True, an element defaults to
all oxidation states in pymatgen Element.icsd_oxidation_states.
Otherwise, default is Element.common_oxidation_states. Note
that the full oxidation state list is *very* inclusive and
can produce nonsensical results.
max_sites (int): if possible, will reduce Compositions to at most
this many sites to speed up oxidation state guesses. If the
composition cannot be reduced to this many sites a ValueError
will be raised. Set to -1 to just reduce fully. If set to a
number less than -1, the formula will be fully reduced but a
ValueError will be thrown if the number of atoms in the reduced
formula is greater than abs(max_sites).
Returns:
list[dict]: Each dict maps the element symbol to a list of
oxidation states for each site of that element. For example, Fe3O4 could
return a list of [2,2,2,3,3,3] for the oxidation states of the 6 Fe sites.
If the composition is not charge balanced, an empty list is returned.
"""
comp = self.copy()
# Reduce Composition if necessary
if max_sites and max_sites < 0:
comp = self.reduced_composition
if max_sites < -1 and comp.num_atoms > abs(max_sites):
raise ValueError(f"Composition {comp} cannot accommodate max_sites setting!")
elif max_sites and comp.num_atoms > max_sites:
reduced_comp, reduced_factor = self.get_reduced_composition_and_factor()
if reduced_factor > 1:
reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms))
comp = reduced_comp # as close to max_sites as possible
if comp.num_atoms > max_sites:
raise ValueError(f"Composition {comp} cannot accommodate max_sites setting!")
# Load prior probabilities of oxidation states, used to rank solutions
if not type(self).oxi_prob:
all_data = loadfn(f"{module_dir}/../analysis/icsd_bv.yaml")
type(self).oxi_prob = {Species.from_str(sp): data for sp, data in all_data["occurrence"].items()}
oxi_states_override = oxi_states_override or {}
# Assert Composition only has integer amounts
if not all(amt == int(amt) for amt in comp.values()):
raise ValueError("Charge balance analysis requires integer values in Composition!")
# For each element, determine all possible sum of oxidations
# (taking into account nsites for that particular element)
el_amt = comp.get_el_amt_dict()
elements = list(el_amt)
el_sums: list = [] # matrix: dim1= el_idx, dim2=possible sums
el_sum_scores: defaultdict = defaultdict(set) # dict of el_idx, sum -> score
el_best_oxid_combo: dict = {} # dict of el_idx, sum -> oxid combo with best score
for idx, el in enumerate(elements):
el_sum_scores[idx] = {}
el_best_oxid_combo[idx] = {}
el_sums.append([])
if oxi_states_override.get(el):
oxids: list | tuple = oxi_states_override[el]
elif all_oxi_states:
oxids = Element(el).oxidation_states
else:
oxids = Element(el).icsd_oxidation_states or Element(el).oxidation_states
# Get all possible combinations of oxidation states
# and sum each combination
for oxid_combo in combinations_with_replacement(oxids, int(el_amt[el])):
# List this sum as a possible option
oxid_sum = sum(oxid_combo)
if oxid_sum not in el_sums[idx]:
el_sums[idx].append(oxid_sum)
# Determine how probable is this combo?
score = sum(type(self).oxi_prob.get(Species(el, o), 0) for o in oxid_combo) # type: ignore[union-attr]
# If it is the most probable combo for a certain sum,
# store the combination
if oxid_sum not in el_sum_scores[idx] or score > el_sum_scores[idx].get(oxid_sum, 0):
el_sum_scores[idx][oxid_sum] = score
el_best_oxid_combo[idx][oxid_sum] = oxid_combo