-
Notifications
You must be signed in to change notification settings - Fork 874
/
Copy pathoutputs.py
5604 lines (4880 loc) · 240 KB
/
outputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Classes for reading/manipulating/writing VASP output files."""
from __future__ import annotations
import itertools
import math
import os
import re
import warnings
import xml.etree.ElementTree as ET
from collections import defaultdict
from collections.abc import Iterable
from dataclasses import dataclass
from datetime import datetime, timezone
from glob import glob
from io import StringIO
from pathlib import Path
from typing import TYPE_CHECKING, cast
import numpy as np
from monty.io import reverse_readfile, zopen
from monty.json import MSONable, jsanitize
from monty.os.path import zpath
from monty.re import regrep
from numpy.testing import assert_allclose
from tqdm import tqdm
from pymatgen.core import Composition, Element, Lattice, Structure
from pymatgen.core.trajectory import Trajectory
from pymatgen.core.units import unitized
from pymatgen.electronic_structure.bandstructure import (
BandStructure,
BandStructureSymmLine,
get_reconstructed_band_structure,
)
from pymatgen.electronic_structure.core import Magmom, Orbital, OrbitalType, Spin
from pymatgen.electronic_structure.dos import CompleteDos, Dos
from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry
from pymatgen.io.common import VolumetricData as BaseVolumetricData
from pymatgen.io.core import ParseError
from pymatgen.io.vasp.inputs import Incar, Kpoints, Poscar, Potcar
from pymatgen.io.wannier90 import Unk
from pymatgen.util.io_utils import clean_lines, micro_pyawk
from pymatgen.util.num import make_symmetric_matrix_from_upper_tri
from pymatgen.util.typing import Kpoint, Tuple3Floats, Vector3D
if TYPE_CHECKING:
from collections.abc import Callable
from typing import Any, Literal
# Avoid name conflict with pymatgen.core.Element
from xml.etree.ElementTree import Element as XML_Element
from numpy.typing import NDArray
from typing_extensions import Self
from pymatgen.util.typing import PathLike
def _parse_parameters(val_type: str, val: str) -> bool | str | float | int:
"""
Helper function to convert a Vasprun parameter into the proper type.
Boolean, int and float types are converted.
Args:
val_type: Value type parsed from vasprun.xml.
val: Actual string value parsed for vasprun.xml.
"""
if val_type == "logical":
return val == "T"
if val_type == "int":
return int(val)
if val_type == "string":
return val.strip()
return float(val)
def _parse_v_parameters(
val_type: str,
val: str,
filename: PathLike,
param_name: str,
) -> list[bool] | list[float] | list[int] | list[str]:
"""
Helper function to convert a Vasprun array-type parameter into
the proper type. Boolean, int and float types are converted.
Args:
val_type: Value type parsed from vasprun.xml.
val: Actual string value parsed for vasprun.xml.
filename: Fullpath of vasprun.xml. Used for robust error handling.
e.g. if vasprun.xml contains *** for some Incar parameters,
the code will try to read from an INCAR file present in the same
directory.
param_name: Name of parameter.
Returns:
Parsed value.
"""
err = ValueError("Error in parsing vasprun.xml")
if val_type == "logical":
return [i == "T" for i in val.split()]
if val_type == "string":
return val.split()
if val_type == "int":
try:
ints = [int(i) for i in val.split()]
except ValueError as exc:
# Fix an error in vasprun where
# sometimes LDAUL/J is displayed as 2****
ints = _parse_from_incar(filename, param_name)
if ints is None:
raise err from exc
return ints
try:
floats = [float(i) for i in val.split()]
except ValueError as exc:
# Fix an error in vasprun where
# sometimes MAGMOM is displayed as 2****
floats = _parse_from_incar(filename, param_name)
if floats is None:
raise err from exc
return floats
def _parse_vasp_array(elem) -> list[list[float]]:
if elem.get("type") == "logical":
return [[i == "T" for i in v.text.split()] for v in elem]
return [[_vasprun_float(i) for i in v.text.split()] for v in elem]
def _parse_from_incar(filename: PathLike, key: str) -> Any:
"""Helper function to parse a parameter from the INCAR."""
dirname = os.path.dirname(filename)
for fn in os.listdir(dirname):
if re.search("INCAR", fn):
warnings.warn(f"INCAR found. Using {key} from INCAR.")
incar = Incar.from_file(os.path.join(dirname, fn))
return incar.get(key, None)
return None
def _vasprun_float(flt: float | str) -> float:
"""
Large numbers are often represented as ********* in the vasprun.
This function parses these values as np.nan.
"""
try:
return float(flt)
except ValueError:
flt = cast(str, flt)
_flt: str = flt.strip()
if _flt == "*" * len(_flt):
warnings.warn("Float overflow (*******) encountered in vasprun")
return np.nan
raise
@dataclass
class KpointOptProps:
"""Simple container class to store KPOINTS_OPT data in a separate namespace. Used by Vasprun."""
tdos: Dos | None = None
idos: Dos | None = None
pdos: list | None = None
efermi: float | None = None
eigenvalues: dict | None = None
projected_eigenvalues: dict | None = None
projected_magnetisation: np.ndarray | None = None
kpoints: Kpoints | None = None
actual_kpoints: list | None = None
actual_kpoints_weights: list | None = None
dos_has_errors: bool | None = None
class Vasprun(MSONable):
"""
Vastly improved cElementTree-based parser for vasprun.xml files. Uses
iterparse to support incremental parsing of large files.
Speedup over Dom is at least 2x for smallish files (~1 Mb) to orders of
magnitude for larger files (~10 Mb).
**VASP results**
Attributes:
ionic_steps (list): All ionic steps in the run as a list of {"structure": structure at end of run,
"electronic_steps": {All electronic step data in vasprun file}, "stresses": stress matrix}.
tdos (Dos): Total dos calculated at the end of run. Note that this is rounded to 4 decimal
places by VASP.
idos (Dos): Integrated dos calculated at the end of run. Rounded to 4 decimal places by VASP.
pdos (list): List of list of PDos objects. Access as pdos[atomindex][orbitalindex].
efermi (float): Fermi energy.
eigenvalues (dict): Final eigenvalues as a dict of {(spin, kpoint index):[[eigenvalue, occu]]}.
The kpoint index is 0-based (unlike the 1-based indexing in VASP).
projected_eigenvalues (dict): Final projected eigenvalues as a dict of {spin: nd-array}.
To access a particular value, you need to do
Vasprun.projected_eigenvalues[spin][kpoint index][band index][atom index][orbital_index].
The kpoint, band and atom indices are 0-based (unlike the 1-based indexing in VASP).
projected_magnetisation (np.array): Final projected magnetization as a numpy array with the
shape (nkpoints, nbands, natoms, norbitals, 3). Where the last axis is the contribution in the
3 Cartesian directions. This attribute is only set if spin-orbit coupling (LSORBIT = True) or
non-collinear magnetism (LNONCOLLINEAR = True) is turned on in the INCAR.
other_dielectric (dict): Dictionary, with the tag comment as key, containing other variants of
the real and imaginary part of the dielectric constant (e.g., computed by RPA) in function of
the energy (frequency). Optical properties (e.g. absorption coefficient) can be obtained through this.
The data is given as a tuple of 3 values containing each of them the energy, the real part tensor,
and the imaginary part tensor ([energies],[[real_partxx,real_partyy,real_partzz,real_partxy,
real_partyz,real_partxz]],[[imag_partxx,imag_partyy,imag_partzz,imag_partxy, imag_partyz, imag_partxz]]).
nionic_steps (int): The total number of ionic steps. This number is always equal to the total number
of steps in the actual run even if ionic_step_skip is used.
force_constants (np.array): Force constants computed in phonon DFPT run(IBRION = 8).
The data is a 4D numpy array of shape (natoms, natoms, 3, 3).
normalmode_eigenvals (np.array): Normal mode frequencies. 1D numpy array of size 3*natoms.
normalmode_eigenvecs (np.array): Normal mode eigen vectors. 3D numpy array of shape (3*natoms, natoms, 3).
md_data (list): Available only for ML MD runs, i.e., INCAR with ML_LMLFF = .TRUE. md_data is a list of
dict with the following format: [{'energy': {'e_0_energy': -525.07195568, 'e_fr_energy': -525.07195568,
'e_wo_entrp': -525.07195568, 'kinetic': 3.17809233, 'lattice kinetic': 0.0, 'nosekinetic': 1.323e-5,
'nosepot': 0.0, 'total': -521.89385012}, 'forces': [[0.17677989, 0.48309874, 1.85806696], ...],
'structure': Structure object}].
incar (Incar): Incar object for parameters specified in INCAR file.
parameters (Incar): Incar object with parameters that VASP actually used, including all defaults.
kpoints (Kpoints): Kpoints object for KPOINTS specified in run.
actual_kpoints (list): List of actual kpoints, e.g. [[0.25, 0.125, 0.08333333], [-0.25, 0.125, 0.08333333],
[0.25, 0.375, 0.08333333], ....].
actual_kpoints_weights (list): List of kpoint weights, e.g. [0.04166667, 0.04166667, 0.04166667, 0.04166667,
0.04166667, ....].
atomic_symbols (list): List of atomic symbols, e.g. ["Li", "Fe", "Fe", "P", "P", "P"].
potcar_symbols (list): List of POTCAR symbols. e.g. ["PAW_PBE Li 17Jan2003", "PAW_PBE Fe 06Sep2000", ..].
kpoints_opt_props (object): Object whose attributes are the data from KPOINTS_OPT (if present,
else None). Attributes of the same name have the same format and meaning as Vasprun (or they are
None if absent). Attributes are: tdos, idos, pdos, efermi, eigenvalues, projected_eigenvalues,
projected magnetisation, kpoints, actual_kpoints, actual_kpoints_weights, dos_has_errors.
Author: Shyue Ping Ong
"""
def __init__(
self,
filename: PathLike,
ionic_step_skip: int | None = None,
ionic_step_offset: int = 0,
parse_dos: bool = True,
parse_eigen: bool = True,
parse_projected_eigen: bool = False,
parse_potcar_file: PathLike | bool = True,
occu_tol: float = 1e-8,
separate_spins: bool = False,
exception_on_bad_xml: bool = True,
) -> None:
"""
Args:
filename (str): Filename to parse
ionic_step_skip (int): If ionic_step_skip is a number > 1,
only every ionic_step_skip ionic steps will be read for
structure and energies. This is very useful if you are parsing
very large vasprun.xml files and you are not interested in every
single ionic step. Note that the final energies may not be the
actual final energy in the vasprun.
ionic_step_offset (int): Used together with ionic_step_skip. If set,
the first ionic step read will be offset by the amount of
ionic_step_offset. For example, if you want to start reading
every 10th structure but only from the 3rd structure onwards,
set ionic_step_skip to 10 and ionic_step_offset to 3. Main use
case is when doing statistical structure analysis with
extremely long time scale multiple VASP calculations of
varying numbers of steps.
parse_dos (bool): Whether to parse the dos. Defaults to True. Set
to False to shave off significant time from the parsing if you
are not interested in getting those data.
Note that the DOS output from VASP is rounded to 4 decimal places,
which can give some slight inaccuracies.
parse_eigen (bool): Whether to parse the eigenvalues. Defaults to
True. Set to False to shave off significant time from the
parsing if you are not interested in getting those data.
parse_projected_eigen (bool): Whether to parse the projected
eigenvalues and magnetization. Defaults to False. Set to True to obtain
projected eigenvalues and magnetization. **Note that this can take an
extreme amount of time and memory.** So use this wisely.
parse_potcar_file (bool | PathLike): Whether to parse the potcar file to read
the potcar hashes for the potcar_spec attribute. Defaults to True,
where no hashes will be determined and the potcar_spec dictionaries
will read {"symbol": ElSymbol, "hash": None}. By Default, looks in
the same directory as the vasprun.xml, with same extensions as
Vasprun.xml. If a path is provided, look at that path.
occu_tol (float): Sets the minimum tol for the determination of the
vbm and cbm. Usually the default of 1e-8 works well enough,
but there may be pathological cases.
separate_spins (bool): Whether the band gap, CBM, and VBM should be
reported for each individual spin channel. Defaults to False,
which computes the eigenvalue band properties independent of
the spin orientation. If True, the calculation must be spin-polarized.
exception_on_bad_xml (bool): Whether to throw a ParseException if a
malformed XML is detected. Default to True, which ensures only
proper vasprun.xml are parsed. You can set to False if you want
partial results (e.g., if you are monitoring a calculation during a
run), but use the results with care. A warning is issued.
"""
self.filename = filename
self.ionic_step_skip = ionic_step_skip
self.ionic_step_offset = ionic_step_offset
self.occu_tol = occu_tol
self.separate_spins = separate_spins
self.exception_on_bad_xml = exception_on_bad_xml
with zopen(filename, mode="rt") as file:
if ionic_step_skip or ionic_step_offset:
# Remove parts of the xml file and parse the string
content: str = file.read()
steps: list[str] = content.split("<calculation>")
# The text before the first <calculation> is the preamble!
preamble: str = steps.pop(0)
self.nionic_steps: int = len(steps)
new_steps = steps[ionic_step_offset :: int(ionic_step_skip or 1)]
# Add the tailing information in the last step from the run
to_parse: str = "<calculation>".join(new_steps)
if steps[-1] != new_steps[-1]:
to_parse = f"{preamble}<calculation>{to_parse}{steps[-1].split('</calculation>')[-1]}"
else:
to_parse = f"{preamble}<calculation>{to_parse}"
self._parse(
StringIO(to_parse),
parse_dos=parse_dos,
parse_eigen=parse_eigen,
parse_projected_eigen=parse_projected_eigen,
)
else:
self._parse(
file,
parse_dos=parse_dos,
parse_eigen=parse_eigen,
parse_projected_eigen=parse_projected_eigen,
)
self.nionic_steps = len(self.ionic_steps)
if parse_potcar_file:
self.update_potcar_spec(parse_potcar_file)
self.update_charge_from_potcar(parse_potcar_file)
if self.incar.get("ALGO") not in {"CHI", "BSE"} and not self.converged and self.parameters.get("IBRION") != 0:
msg = f"{filename} is an unconverged VASP run.\n"
msg += f"Electronic convergence reached: {self.converged_electronic}.\n"
msg += f"Ionic convergence reached: {self.converged_ionic}."
warnings.warn(msg, UnconvergedVASPWarning)
def _parse(
self,
stream,
parse_dos: bool,
parse_eigen: bool,
parse_projected_eigen: bool,
) -> None:
self.efermi: float | None = None
self.eigenvalues: dict[Any, NDArray] | None = None
self.projected_eigenvalues: dict[Any, NDArray] | None = None
self.projected_magnetisation: NDArray | None = None
self.dielectric_data: dict[str, tuple] = {}
self.other_dielectric: dict[str, tuple] = {}
self.incar: dict[str, Any] = {}
self.kpoints_opt_props: KpointOptProps | None = None
ionic_steps: list = []
md_data: list[dict] = []
parsed_header: bool = False
in_kpoints_opt: bool = False
try:
# When parsing XML, start tags tell us when we have entered a block
# while end tags are when we have actually read the data.
# To know if a particular tag is nested within another block,
# we have to read the start tags (otherwise we will only learn of
# the nesting after we have left the data behind).
# When parsing KPOINTS_OPT data, some of the tags in vasprun.xml
# can only be distinguished from their regular counterparts by
# whether they are nested within another block. This is why we
# must read both start and end tags and have flags to tell us
# when we have entered or left a block. (2024-01-26)
for event, elem in ET.iterparse(stream, events=["start", "end"]):
tag = elem.tag
if event == "start":
# The start event tells us when we have entered blocks
if tag == "calculation":
parsed_header = True
elif tag in ("eigenvalues_kpoints_opt", "projected_kpoints_opt"):
in_kpoints_opt = True
else: # event == "end":
# The end event happens when we have read a block, so have
# its data.
if not parsed_header:
if tag == "generator":
self.generator = self._parse_params(elem)
elif tag == "incar":
self.incar = self._parse_params(elem)
elif tag == "kpoints":
if not hasattr(self, "kpoints"):
self.kpoints, self.actual_kpoints, self.actual_kpoints_weights = self._parse_kpoints(
elem
)
elif tag == "parameters":
self.parameters = self._parse_params(elem)
elif tag == "structure" and elem.attrib.get("name") == "initialpos":
self.initial_structure = self._parse_structure(elem)
self.final_structure = self.initial_structure
elif tag == "atominfo":
self.atomic_symbols, self.potcar_symbols = self._parse_atominfo(elem)
self.potcar_spec: list[dict] = [
{"titel": titel, "hash": None, "summary_stats": {}} for titel in self.potcar_symbols
]
if tag == "calculation":
parsed_header = True
if not self.parameters.get("LCHIMAG", False):
ionic_steps.append(self._parse_ionic_step(elem))
else:
ionic_steps.extend(self._parse_chemical_shielding(elem))
elif parse_dos and tag == "dos":
if elem.get("comment") == "kpoints_opt":
kpoints_opt_props = self.kpoints_opt_props = self.kpoints_opt_props or KpointOptProps()
try:
kpoints_opt_props.tdos, kpoints_opt_props.idos, kpoints_opt_props.pdos = (
self._parse_dos(elem)
)
kpoints_opt_props.efermi = kpoints_opt_props.tdos.efermi
kpoints_opt_props.dos_has_errors = False
except Exception:
kpoints_opt_props.dos_has_errors = True
else:
try:
self.tdos, self.idos, self.pdos = self._parse_dos(elem)
self.efermi = self.tdos.efermi
self.dos_has_errors = False
except Exception:
self.dos_has_errors = True
elif parse_eigen and tag == "eigenvalues" and not in_kpoints_opt:
self.eigenvalues = self._parse_eigen(elem)
elif parse_projected_eigen and tag == "projected" and not in_kpoints_opt:
self.projected_eigenvalues, self.projected_magnetisation = self._parse_projected_eigen(elem)
elif tag in ("eigenvalues_kpoints_opt", "projected_kpoints_opt"):
in_kpoints_opt = False
if self.kpoints_opt_props is None:
self.kpoints_opt_props = KpointOptProps()
if parse_eigen:
# projected_kpoints_opt includes occupation information whereas
# eigenvalues_kpoints_opt doesn't.
self.kpoints_opt_props.eigenvalues = self._parse_eigen(elem.find("eigenvalues"))
if tag == "eigenvalues_kpoints_opt":
(
self.kpoints_opt_props.kpoints,
self.kpoints_opt_props.actual_kpoints,
self.kpoints_opt_props.actual_kpoints_weights,
) = self._parse_kpoints(elem.find("kpoints"))
elif parse_projected_eigen: # and tag == "projected_kpoints_opt": (implied)
(
self.kpoints_opt_props.projected_eigenvalues,
self.kpoints_opt_props.projected_magnetisation,
) = self._parse_projected_eigen(elem)
elif tag == "dielectricfunction":
if (
"comment" not in elem.attrib
or elem.attrib["comment"]
== "INVERSE MACROSCOPIC DIELECTRIC TENSOR (including local field effects in RPA (Hartree))"
):
if "density" not in self.dielectric_data:
self.dielectric_data["density"] = self._parse_diel(elem)
elif "velocity" not in self.dielectric_data:
# "velocity-velocity" is also named
# "current-current" in OUTCAR
self.dielectric_data["velocity"] = self._parse_diel(elem)
else:
raise NotImplementedError("This vasprun.xml has >2 unlabelled dielectric functions")
else:
comment = elem.attrib["comment"]
# VASP 6+ has labels for the density and current
# derived dielectric constants
if comment == "density-density":
self.dielectric_data["density"] = self._parse_diel(elem)
elif comment == "current-current":
self.dielectric_data["velocity"] = self._parse_diel(elem)
else:
self.other_dielectric[comment] = self._parse_diel(elem)
elif tag == "varray" and elem.attrib.get("name") == "opticaltransitions":
self.optical_transition = np.array(_parse_vasp_array(elem))
elif tag == "structure" and elem.attrib.get("name") == "finalpos":
self.final_structure = self._parse_structure(elem)
elif tag == "dynmat":
hessian, eigenvalues, eigenvectors = self._parse_dynmat(elem)
# n_atoms is not the total number of atoms, only those for which force constants were calculated
# https://github.com/materialsproject/pymatgen/issues/3084
n_atoms = len(hessian) // 3
hessian = np.array(hessian)
self.force_constants = np.zeros((n_atoms, n_atoms, 3, 3), dtype="double")
for ii in range(n_atoms):
for jj in range(n_atoms):
self.force_constants[ii, jj] = hessian[ii * 3 : (ii + 1) * 3, jj * 3 : (jj + 1) * 3] # type: ignore[call-overload]
phonon_eigenvectors = []
for ev in eigenvectors:
phonon_eigenvectors.append(np.array(ev).reshape(n_atoms, 3))
self.normalmode_eigenvals = np.array(eigenvalues)
self.normalmode_eigenvecs = np.array(phonon_eigenvectors)
elif self.incar.get("ML_LMLFF"):
if tag == "structure" and elem.attrib.get("name") is None:
md_data.append({})
md_data[-1]["structure"] = self._parse_structure(elem)
elif tag == "varray" and elem.attrib.get("name") == "forces":
md_data[-1]["forces"] = _parse_vasp_array(elem)
elif tag == "varray" and elem.attrib.get("name") == "stress":
md_data[-1]["stress"] = _parse_vasp_array(elem)
elif tag == "energy":
d = {i.attrib["name"]: float(i.text) for i in elem.findall("i")}
if "kinetic" in d:
md_data[-1]["energy"] = {i.attrib["name"]: float(i.text) for i in elem.findall("i")}
except ET.ParseError:
if self.exception_on_bad_xml:
raise
warnings.warn(
"XML is malformed. Parsing has stopped but partial data is available.",
UserWarning,
)
self.ionic_steps = ionic_steps
self.md_data = md_data
self.vasp_version = self.generator["version"]
@property
def structures(self) -> list[Structure]:
"""List of Structures for each ionic step."""
return [step["structure"] for step in self.ionic_steps]
@property
def epsilon_static(self) -> list[float]:
"""The static part of the dielectric constant.
Present only when it's a DFPT run (LEPSILON=TRUE).
"""
return self.ionic_steps[-1].get("epsilon", [])
@property
def epsilon_static_wolfe(self) -> list[float]:
"""The static part of the dielectric constant without any local
field effects. Present only when it's a DFPT run (LEPSILON=TRUE).
"""
return self.ionic_steps[-1].get("epsilon_rpa", [])
@property
def epsilon_ionic(self) -> list[float]:
"""The ionic part of the static dielectric constant.
Present when it's a DFPT run (LEPSILON=TRUE) and IBRION=5, 6, 7 or 8.
"""
return self.ionic_steps[-1].get("epsilon_ion", [])
@property
def dielectric(self) -> tuple[list, list, list]:
"""The real and imaginary part of the dielectric constant (e.g.,
computed by RPA) in function of the energy (frequency).
Optical properties (e.g. absorption coefficient) can be obtained through this.
Returns:
The data is given as a tuple of 3 for the energy, the real part
tensor, and the imaginary part tensor:
([energies], [[real_partxx, real_partyy, real_partzz, real_partxy,
real_partyz, real_partxz]], [[imag_partxx, imag_partyy, imag_partzz,
imag_partxy, imag_partyz, imag_partxz]]).
"""
return self.dielectric_data["density"]
@property
def optical_absorption_coeff(self) -> list[float] | None:
"""The optical absorption coefficient from the dielectric constants.
Note that this method is only implemented for optical properties
calculated with GGA and BSE.
"""
if self.dielectric_data["density"]:
real_avg = [
sum(self.dielectric_data["density"][1][i][:3]) / 3
for i in range(len(self.dielectric_data["density"][0]))
]
imag_avg = [
sum(self.dielectric_data["density"][2][i][:3]) / 3
for i in range(len(self.dielectric_data["density"][0]))
]
def optical_absorb_coeff(freq: float, real: float, imag: float) -> float:
"""Calculate optical absorption coefficient,
the unit is cm^-1.
"""
hc = 1.23984 * 1e-4 # plank constant times speed of light, in the unit of eV*cm
return 2 * 3.14159 * np.sqrt(np.sqrt(real**2 + imag**2) - real) * np.sqrt(2) / hc * freq
return list(
itertools.starmap(
optical_absorb_coeff, zip(self.dielectric_data["density"][0], real_avg, imag_avg, strict=True)
)
)
return None
@property
def converged_electronic(self) -> bool:
"""Whether electronic step converged in the final ionic step."""
final_elec_steps = (
self.ionic_steps[-1]["electronic_steps"] if self.incar.get("ALGO", "").lower() != "chi" else 0
)
# In a response function run there is no ionic steps, there is no scf step
if self.incar.get("LEPSILON"):
idx = 1
to_check = {"e_wo_entrp", "e_fr_energy", "e_0_energy"}
while set(final_elec_steps[idx]) == to_check:
idx += 1
return idx + 1 != self.parameters["NELM"]
return len(final_elec_steps) < self.parameters["NELM"]
@property
def converged_ionic(self) -> bool:
"""Whether ionic step convergence has been reached, i.e. VASP
exited before reaching the max ionic steps for a relaxation run.
In case IBRION=0 (MD) or EDIFFG=0, returns True if the max ionic
steps are reached.
"""
nsw = self.parameters.get("NSW", 0)
ibrion = self.parameters.get("IBRION", -1 if nsw in (-1, 0) else 0)
if ibrion == 0:
return nsw <= 1 or self.md_n_steps == nsw
# Context re EDIFFG: the use case for EDIFFG=0 is to ensure a relaxation runs for
# NSW steps (the non-AIMD way to generate a relaxation trajectory with DFT). In
# that case, user isn't worried about convergence w.r.t. forces or energy. The
# next if statement prevents custodian from trying to correct the calc because
# Vasprun.converged_ionic = False.
ediffg = self.parameters.get("EDIFFG", 1)
if ibrion in {1, 2} and ediffg == 0:
return nsw <= 1 or nsw == len(self.ionic_steps)
return nsw <= 1 or len(self.ionic_steps) < nsw
@property
def converged(self) -> bool:
"""Whether a relaxation run has both ionically and
electronically converged.
"""
return self.converged_electronic and self.converged_ionic
@property
@unitized("eV")
def final_energy(self) -> float:
"""Final energy from the VASP run."""
try:
final_istep = self.ionic_steps[-1]
total_energy = final_istep["e_0_energy"]
# Fix a bug in vasprun.xml.
# See https://www.vasp.at/forum/viewtopic.php?f=3&t=16942
final_estep = final_istep["electronic_steps"][-1]
electronic_energy_diff = final_estep["e_0_energy"] - final_estep["e_fr_energy"]
total_energy_bugfix = np.round(electronic_energy_diff + final_istep["e_fr_energy"], 8)
if np.abs(total_energy - total_energy_bugfix) > 1e-7:
return total_energy_bugfix
return total_energy
except (IndexError, KeyError):
warnings.warn(
"Calculation does not have a total energy. "
"Possibly a GW or similar kind of run. "
"Infinity is returned."
)
return float("inf")
@property
def complete_dos(self) -> CompleteDos:
"""A CompleteDos object which incorporates the total DOS and all projected DOS."""
final_struct = self.final_structure
pdoss = {final_struct[i]: pdos for i, pdos in enumerate(self.pdos)}
return CompleteDos(self.final_structure, self.tdos, pdoss)
@property
def complete_dos_normalized(self) -> CompleteDos:
"""A CompleteDos object which incorporates the total DOS and all projected DOS.
Normalized by the volume of the unit cell with units of states/eV/unit cell
volume.
"""
final_struct = self.final_structure
pdoss = {final_struct[i]: pdos for i, pdos in enumerate(self.pdos)}
return CompleteDos(self.final_structure, self.tdos, pdoss, normalize=True)
@property
def hubbards(self) -> dict[str, float]:
"""Hubbard U values used for a GGA+U run, otherwise an empty dict."""
symbols = [s.split()[1] for s in self.potcar_symbols]
symbols = [re.split(r"_", s)[0] for s in symbols]
if not self.incar.get("LDAU", False):
return {}
us = self.incar.get("LDAUU", self.parameters.get("LDAUU"))
js = self.incar.get("LDAUJ", self.parameters.get("LDAUJ"))
if len(js) != len(us):
js = [0] * len(us)
if len(us) == len(symbols):
return {symbols[idx]: us[idx] - js[idx] for idx in range(len(symbols))}
if sum(us) == 0 and sum(js) == 0:
return {}
raise VaspParseError("Length of U value parameters and atomic symbols are mismatched")
@property
def run_type(self) -> str:
"""The run type. Currently detects GGA, metaGGA, HF, HSE, B3LYP,
and hybrid functionals based on relevant INCAR tags. LDA is assigned if
PAW POTCARs are used and no other functional is detected.
Hubbard U terms and vdW corrections are detected automatically as well.
"""
# Care should be taken: if a GGA tag is not specified, VASP will default
# to the functional specified by the POTCAR. It is not clear how
# VASP handles the "--" value.
GGA_TYPES = {
"RE": "revPBE",
"PE": "PBE",
"PS": "PBEsol",
"RP": "revPBE+Padé",
"AM": "AM05",
"OR": "optPBE",
"BO": "optB88",
"MK": "optB86b",
"--": "GGA",
}
METAGGA_TYPES = {
"TPSS": "TPSS",
"RTPSS": "revTPSS",
"M06L": "M06-L",
"MBJ": "modified Becke-Johnson",
"SCAN": "SCAN",
"R2SCAN": "R2SCAN",
"RSCAN": "RSCAN",
"MS0": "MadeSimple0",
"MS1": "MadeSimple1",
"MS2": "MadeSimple2",
}
IVDW_TYPES = {
0: "no-correction",
1: "DFT-D2",
10: "DFT-D2",
11: "DFT-D3",
12: "DFT-D3-BJ",
2: "TS",
20: "TS",
21: "TS-H",
202: "MBD",
4: "dDsC",
}
if self.parameters.get("AEXX", 1.00) == 1.00:
run_type = "HF"
elif self.parameters.get("HFSCREEN", 0.30) == 0.30:
run_type = "HSE03"
elif self.parameters.get("HFSCREEN", 0.20) == 0.20:
run_type = "HSE06"
elif self.parameters.get("AEXX", 0.20) == 0.20:
run_type = "B3LYP"
elif self.parameters.get("LHFCALC", True):
run_type = "PBEO or other Hybrid Functional"
elif self.incar.get("METAGGA") and self.incar.get("METAGGA") not in {
"--",
"None",
}:
incar_tag = self.incar.get("METAGGA", "").strip().upper()
run_type = METAGGA_TYPES.get(incar_tag, incar_tag)
elif self.parameters.get("GGA"):
incar_tag = self.parameters.get("GGA", "").strip().upper()
run_type = GGA_TYPES.get(incar_tag, incar_tag)
elif self.potcar_symbols[0].split()[0] == "PAW":
run_type = "LDA"
else:
run_type = "unknown"
warnings.warn("Unknown run type!")
if self.is_hubbard or self.parameters.get("LDAU", True):
run_type += "+U"
if self.parameters.get("LUSE_VDW", False):
run_type += "+rVV10"
elif self.incar.get("IVDW") in IVDW_TYPES:
run_type += f"+vdW-{IVDW_TYPES[self.incar.get('IVDW', 0)]}"
elif self.incar.get("IVDW"):
run_type += "+vdW-unknown"
return run_type
@property
def is_hubbard(self) -> bool:
"""Whether is a DFT+U run."""
if len(self.hubbards) == 0:
return False
return sum(self.hubbards.values()) > 1e-8
@property
def is_spin(self) -> bool:
"""Whether is spin-polarized."""
return self.parameters.get("ISPIN", 1) == 2
@property
def md_n_steps(self) -> int:
"""Number of steps for MD runs.
Count all the actual MD steps if ML enabled.
"""
return len(self.md_data) if self.md_data else self.nionic_steps
def get_computed_entry(
self,
inc_structure: bool = True,
parameters: list[str] | None = None,
data: dict | None = None,
entry_id: str | None = None,
) -> ComputedStructureEntry | ComputedEntry:
"""Get a ComputedEntry or ComputedStructureEntry from the Vasprun.
Args:
inc_structure (bool): Whether to return ComputedStructureEntries
instead of ComputedEntries.
parameters (list): Input parameters to include. It has to be one of
the properties supported by the Vasprun object. If is None,
a default set of parameters that are
necessary for typical post-processing will be set.
data (dict): Output data to include. Have to be the properties
supported by the Vasprun object.
entry_id (str): An entry id for the ComputedEntry.
Defaults to "vasprun-{current datetime}"
Returns:
ComputedStructureEntry/ComputedEntry
"""
if entry_id is None:
entry_id = f"vasprun-{datetime.now(tz=timezone.utc)}"
param_names = {
"is_hubbard",
"hubbards",
"potcar_symbols",
"potcar_spec",
"run_type",
}
if parameters is not None and len(parameters) > 0:
param_names.update(parameters)
params = {param: getattr(self, param) for param in param_names}
data = {} if data is None else {param: getattr(self, param) for param in data}
if inc_structure:
return ComputedStructureEntry(
self.final_structure, self.final_energy, parameters=params, data=data, entry_id=entry_id
)
return ComputedEntry(
self.final_structure.composition, self.final_energy, parameters=params, data=data, entry_id=entry_id
)
def get_band_structure(
self,
kpoints_filename: str | None = None,
efermi: float | Literal["smart"] | None = None,
line_mode: bool = False,
force_hybrid_mode: bool = False,
ignore_kpoints_opt: bool = False,
) -> BandStructureSymmLine | BandStructure:
"""Get the band structure.
Args:
kpoints_filename: Full path of the KPOINTS file from which
the band structure is generated.
If None is provided, the code will try to intelligently
determine the appropriate KPOINTS file by substituting the
filename of the vasprun.xml with KPOINTS (or KPOINTS_OPT).
The latter is the default behavior.
efermi: The Fermi energy associated with the bandstructure, in eV. By
default (None), uses the value reported by VASP in vasprun.xml. To
manually set the Fermi energy, pass a float. Pass 'smart' to use the
`calculate_efermi()` method, which calculates the Fermi level by first
checking whether it lies within a small tolerance (by default 0.001 eV)
of a band edge) If it does, the Fermi level is placed in the center of
the bandgap. Otherwise, the value is identical to the value reported by
VASP.
line_mode: Force the band structure to be considered as
a run along symmetry lines. (Default: False)
force_hybrid_mode: Makes it possible to read in self-consistent band
structure calculations for every type of functional. (Default: False)
ignore_kpoints_opt: Normally, if KPOINTS_OPT data exists, it has
the band structure data. Set this flag to ignore it. (Default: False)
Returns:
BandStructure (or more specifically a BandStructureSymmLine object if the run
is detected to be a run along symmetry lines)
Two types of runs along symmetry lines are accepted: non-sc with
Line-Mode in the KPOINT file or hybrid, self-consistent with a
uniform grid+a few kpoints along symmetry lines (explicit KPOINTS
file) (it's not possible to run a non-sc band structure with hybrid
functionals). The explicit KPOINTS file needs to have data on the
kpoint label as commentary.
If VASP was run with KPOINTS_OPT, it reads the data from that
file unless told otherwise. This overrides hybrid mode.
"""
use_kpoints_opt = not ignore_kpoints_opt and (getattr(self, "kpoints_opt_props", None) is not None)
if not kpoints_filename:
kpts_path = os.path.join(os.path.dirname(self.filename), "KPOINTS_OPT" if use_kpoints_opt else "KPOINTS")
kpoints_filename = zpath(kpts_path)
if kpoints_filename and not os.path.isfile(kpoints_filename) and line_mode:
name = "KPOINTS_OPT" if use_kpoints_opt else "KPOINTS"
raise VaspParseError(f"{name} not found but needed to obtain band structure along symmetry lines.")
# Note that we're using the Fermi energy of the self-consistent grid
# run even if we're reading bands from KPOINTS_OPT.
if efermi == "smart":
e_fermi: float | None = self.calculate_efermi()
elif efermi is None:
e_fermi = self.efermi
else:
e_fermi = efermi
if e_fermi is None:
raise ValueError("e_fermi is None.")
kpoint_file: Kpoints | None = None
if kpoints_filename and os.path.isfile(kpoints_filename):
kpoint_file = Kpoints.from_file(kpoints_filename)
lattice_new = Lattice(self.final_structure.lattice.reciprocal_lattice.matrix)
if use_kpoints_opt:
if self.kpoints_opt_props is None or self.kpoints_opt_props.actual_kpoints is None:
raise RuntimeError("KPOINTS_opt or actual_kpoints is None.")
kpoints = [np.array(kpt) for kpt in self.kpoints_opt_props.actual_kpoints]
else:
if self.actual_kpoints is None:
raise RuntimeError("actual_kpoints is None.")
kpoints = [np.array(kpt) for kpt in self.actual_kpoints]
p_eig_vals: defaultdict[Spin, list] = defaultdict(list)
eigenvals: defaultdict[Spin, list] = defaultdict(list)
n_kpts = len(kpoints)
if use_kpoints_opt:
if self.kpoints_opt_props is None:
raise RuntimeError("KPOINTS_opt is None.")
eig_vals = self.kpoints_opt_props.eigenvalues
projected_eig_vals = self.kpoints_opt_props.projected_eigenvalues
else:
eig_vals = self.eigenvalues
projected_eig_vals = self.projected_eigenvalues
if eig_vals is None:
raise ValueError("Eigenvalues are None.")
for spin, val in eig_vals.items():
val = np.swapaxes(val, 0, 1)
eigenvals[spin] = val[:, :, 0]
if projected_eig_vals:
proj_eig_vals = projected_eig_vals[spin]
# Original axes for self.projected_eigenvalues are kpoints, band, ion, orb.
# For BS input, we need band, kpoints, orb, ion.
proj_eig_vals = np.swapaxes(proj_eig_vals, 0, 1) # Swap kpoint and band axes
proj_eig_vals = np.swapaxes(proj_eig_vals, 2, 3) # Swap ion and orb axes
p_eig_vals[spin] = proj_eig_vals
# Check if we have an hybrid band structure computation
# for this we look at the presence of the LHFCALC tag
# (but hybrid mode is redundant if using kpoints_opt)
hybrid_band = False
if self.parameters.get("LHFCALC", False) or 0.0 in self.actual_kpoints_weights:
hybrid_band = True
if kpoint_file is not None and kpoint_file.style == Kpoints.supported_modes.Line_mode:
line_mode = True
if line_mode:
labels_dict = {}
if kpoint_file is None:
raise RuntimeError("Kpoint file cannot be None for line mode.")
if (hybrid_band or force_hybrid_mode) and not use_kpoints_opt:
start_bs_index = 0
for i in range(len(self.actual_kpoints)):