-
Notifications
You must be signed in to change notification settings - Fork 874
/
Copy pathcommon.py
416 lines (353 loc) · 16.4 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
"""Module for defining common data used and produced by atomistic simulation packages."""
from __future__ import annotations
import itertools
import json
import warnings
from copy import deepcopy
from typing import TYPE_CHECKING
import numpy as np
from monty.io import zopen
from monty.json import MSONable
from scipy.interpolate import RegularGridInterpolator
from pymatgen.core import Element, Site, Structure
from pymatgen.core.units import ang_to_bohr, bohr_to_angstrom
from pymatgen.electronic_structure.core import Spin
if TYPE_CHECKING:
from pathlib import Path
from typing_extensions import Self
class VolumetricData(MSONable):
"""
Simple volumetric object. Used to read LOCPOT/CHGCAR files produced by
vasp as well as cube files produced by other codes.
Attributes:
structure (Structure): Structure associated with the Volumetric Data object.
is_spin_polarized (bool): True if run is spin polarized.
dim (tuple): Tuple of dimensions of volumetric grid in each direction (nx, ny, nz).
data (dict): Actual data as a dict of {string: np.array}. The string are "total"
and "diff", in accordance to the output format of Vasp LOCPOT and
CHGCAR files where the total spin density is written first, followed
by the difference spin density.
ngridpts (int): Total number of grid points in volumetric data.
"""
def __init__(
self,
structure: Structure,
data: dict[str, np.ndarray],
distance_matrix: np.ndarray | None = None,
data_aug: np.ndarray | None = None,
) -> None:
"""
Typically, this constructor is not used directly and the static
from_file constructor is used. This constructor is designed to allow
summation and other operations between VolumetricData objects.
Args:
structure (Structure): associated with the volumetric data
data (dict[str, np.array]): Actual volumetric data.
distance_matrix (np.array): A pre-computed distance matrix if available.
Useful so pass distance_matrices between sums,
short-circuiting an otherwise expensive operation.
data_aug (np.array): Any extra information associated with volumetric data
(typically augmentation charges)
"""
self.structure = structure
self.is_spin_polarized = len(data) >= 2
self.is_soc = len(data) >= 4
# convert data to numpy arrays in case they were jsanitized as lists
self.data = {k: np.array(v) for k, v in data.items()}
self.dim = self.data["total"].shape
self.data_aug = data_aug or {}
self.ngridpts = self.dim[0] * self.dim[1] * self.dim[2]
# lazy init the spin data since this is not always needed.
self._spin_data: dict[Spin, float] = {}
self._distance_matrix = distance_matrix or {}
self.xpoints = np.linspace(0.0, 1.0, num=self.dim[0])
self.ypoints = np.linspace(0.0, 1.0, num=self.dim[1])
self.zpoints = np.linspace(0.0, 1.0, num=self.dim[2])
self.interpolator = RegularGridInterpolator(
(self.xpoints, self.ypoints, self.zpoints),
self.data["total"],
bounds_error=True,
)
self.name = "VolumetricData"
@property
def spin_data(self):
"""The data decomposed into actual spin data as {spin: data}.
Essentially, this provides the actual Spin.up and Spin.down data
instead of the total and diff. Note that by definition, a
non-spin-polarized run would have Spin.up data == Spin.down data.
"""
if not self._spin_data:
spin_data = {}
spin_data[Spin.up] = 0.5 * (self.data["total"] + self.data.get("diff", 0))
spin_data[Spin.down] = 0.5 * (self.data["total"] - self.data.get("diff", 0))
self._spin_data = spin_data
return self._spin_data
def get_axis_grid(self, ind):
"""Get the grid for a particular axis.
Args:
ind (int): Axis index.
"""
ng = self.dim
num_pts = ng[ind]
lengths = self.structure.lattice.abc
return [i / num_pts * lengths[ind] for i in range(num_pts)]
def __add__(self, other):
return self.linear_add(other, 1.0)
def __sub__(self, other):
return self.linear_add(other, -1.0)
def copy(self) -> Self:
"""Make a copy of VolumetricData object."""
return VolumetricData(
self.structure,
{k: v.copy() for k, v in self.data.items()},
distance_matrix=self._distance_matrix,
data_aug=self.data_aug,
)
def linear_add(self, other, scale_factor=1.0):
"""
Method to do a linear sum of volumetric objects. Used by + and -
operators as well. Returns a VolumetricData object containing the
linear sum.
Args:
other (VolumetricData): Another VolumetricData object
scale_factor (float): Factor to scale the other data by.
Returns:
VolumetricData corresponding to self + scale_factor * other.
"""
if self.structure != other.structure:
warnings.warn("Structures are different. Make sure you know what you are doing...")
if list(self.data) != list(other.data):
raise ValueError("Data have different keys! Maybe one is spin-polarized and the other is not?")
# To add checks
data = {}
for k in self.data:
data[k] = self.data[k] + scale_factor * other.data[k]
new = deepcopy(self)
new.data = data
new.data_aug = {}
return new
def scale(self, factor):
"""Scale the data in place by a factor."""
for k in self.data:
self.data[k] = np.multiply(self.data[k], factor)
def value_at(self, x, y, z):
"""Get a data value from self.data at a given point (x, y, z) in terms
of fractional lattice parameters. Will be interpolated using a
RegularGridInterpolator on self.data if (x, y, z) is not in the original
set of data points.
Args:
x (float): Fraction of lattice vector a.
y (float): Fraction of lattice vector b.
z (float): Fraction of lattice vector c.
Returns:
Value from self.data (potentially interpolated) corresponding to
the point (x, y, z).
"""
return self.interpolator([x, y, z])[0]
def linear_slice(self, p1, p2, n=100):
"""Get a linear slice of the volumetric data with n data points from
point p1 to point p2, in the form of a list.
Args:
p1 (list): 3-element list containing fractional coordinates of the first point.
p2 (list): 3-element list containing fractional coordinates of the second point.
n (int): Number of data points to collect, defaults to 100.
Returns:
List of n data points (mostly interpolated) representing a linear slice of the
data from point p1 to point p2.
"""
if type(p1) not in {list, np.ndarray}:
raise TypeError(f"type of p1 should be list or np.ndarray, got {type(p1).__name__}")
if len(p1) != 3:
raise ValueError(f"length of p1 should be 3, got {len(p1)}")
if type(p2) not in {list, np.ndarray}:
raise TypeError(f"type of p2 should be list or np.ndarray, got {type(p2).__name__}")
if len(p2) != 3:
raise ValueError(f"length of p2 should be 3, got {len(p2)}")
x_pts = np.linspace(p1[0], p2[0], num=n)
y_pts = np.linspace(p1[1], p2[1], num=n)
z_pts = np.linspace(p1[2], p2[2], num=n)
return [self.value_at(x_pts[i], y_pts[i], z_pts[i]) for i in range(n)]
def get_integrated_diff(self, ind, radius, nbins=1):
"""Get integrated difference of atom index ind up to radius. This can be
an extremely computationally intensive process, depending on how many
grid points are in the VolumetricData.
Args:
ind (int): Index of atom.
radius (float): Radius of integration.
nbins (int): Number of bins. Defaults to 1. This allows one to
obtain the charge integration up to a list of the cumulative
charge integration values for radii for [radius/nbins,
2 * radius/nbins, ....].
Returns:
Differential integrated charge as a np array of [[radius, value],
...]. Format is for ease of plotting. e.g. plt.plot(data[:,0],
data[:,1])
"""
# For non-spin-polarized runs, this is zero by definition.
if not self.is_spin_polarized:
radii = [radius / nbins * (i + 1) for i in range(nbins)]
data = np.zeros((nbins, 2))
data[:, 0] = radii
return data
struct = self.structure
a = self.dim
if ind not in self._distance_matrix or self._distance_matrix[ind]["max_radius"] < radius:
coords = []
for x, y, z in itertools.product(*(list(range(i)) for i in a)):
coords.append([x / a[0], y / a[1], z / a[2]])
sites_dist = struct.lattice.get_points_in_sphere(coords, struct[ind].coords, radius)
self._distance_matrix[ind] = {
"max_radius": radius,
"data": np.array(sites_dist, dtype=object),
}
data = self._distance_matrix[ind]["data"]
# Use boolean indexing to find all charges within the desired distance.
inds = data[:, 1] <= radius
dists = data[inds, 1]
data_inds = np.rint(np.mod(list(data[inds, 0]), 1) * np.tile(a, (len(dists), 1))).astype(int)
vals = [self.data["diff"][x, y, z] for x, y, z in data_inds]
hist, edges = np.histogram(dists, bins=nbins, range=[0, radius], weights=vals)
data = np.zeros((nbins, 2))
data[:, 0] = edges[1:]
data[:, 1] = [sum(hist[0 : i + 1]) / self.ngridpts for i in range(nbins)]
return data
def get_average_along_axis(self, ind):
"""Get the averaged total of the volumetric data a certain axis direction.
For example, useful for visualizing Hartree Potentials from a LOCPOT
file.
Args:
ind (int): Index of axis.
Returns:
Average total along axis
"""
total_spin_dens = self.data["total"]
ng = self.dim
if ind == 0:
total = np.sum(np.sum(total_spin_dens, axis=1), 1)
elif ind == 1:
total = np.sum(np.sum(total_spin_dens, axis=0), 1)
else:
total = np.sum(np.sum(total_spin_dens, axis=0), 0)
return total / ng[(ind + 1) % 3] / ng[(ind + 2) % 3]
def to_hdf5(self, filename):
"""Write the VolumetricData to a HDF5 format, which is a highly optimized
format for reading storing large data. The mapping of the VolumetricData
to this file format is as follows:
VolumetricData.data -> f["vdata"]
VolumetricData.structure ->
f["Z"]: Sequence of atomic numbers
f["fcoords"]: Fractional coords
f["lattice"]: Lattice in the pymatgen.core.Lattice matrix
format
f.attrs["structure_json"]: String of JSON representation
Args:
filename (str): Filename to output to.
"""
import h5py
with h5py.File(filename, mode="w") as file:
ds = file.create_dataset("lattice", (3, 3), dtype="float")
ds[...] = self.structure.lattice.matrix
ds = file.create_dataset("Z", (len(self.structure.species),), dtype="i")
ds[...] = np.array([sp.Z for sp in self.structure.species])
ds = file.create_dataset("fcoords", self.structure.frac_coords.shape, dtype="float")
ds[...] = self.structure.frac_coords
dt = h5py.special_dtype(vlen=str)
ds = file.create_dataset("species", (len(self.structure.species),), dtype=dt)
ds[...] = [str(sp) for sp in self.structure.species]
grp = file.create_group("vdata")
for k in self.data:
ds = grp.create_dataset(k, self.data[k].shape, dtype="float")
ds[...] = self.data[k]
file.attrs["name"] = self.name
file.attrs["structure_json"] = json.dumps(self.structure.as_dict())
@classmethod
def from_hdf5(cls, filename: str, **kwargs) -> Self:
"""
Reads VolumetricData from HDF5 file.
Args:
filename: Filename
Returns:
VolumetricData
"""
import h5py
with h5py.File(filename, mode="r") as file:
data = {k: np.array(v) for k, v in file["vdata"].items()}
data_aug = None
if "vdata_aug" in file:
data_aug = {k: np.array(v) for k, v in file["vdata_aug"].items()}
structure = Structure.from_dict(json.loads(file.attrs["structure_json"]))
return cls(structure, data=data, data_aug=data_aug, **kwargs)
def to_cube(self, filename, comment: str = ""):
"""Write the total volumetric data to a cube file format, which consists of two comment lines,
a header section defining the structure IN BOHR, and the data.
Args:
filename (str): Name of the cube file to be written.
comment (str): If provided, this will be added to the second comment line
"""
with zopen(filename, mode="wt") as file:
file.write(f"# Cube file for {self.structure.formula} generated by Pymatgen\n")
file.write(f"# {comment}\n")
file.write(f"\t {len(self.structure)} 0.000000 0.000000 0.000000\n")
for idx in range(3):
lattice_matrix = self.structure.lattice.matrix[idx] / self.dim[idx] * ang_to_bohr
file.write(
f"\t {self.dim[idx]} {lattice_matrix[0]:.6f} {lattice_matrix[1]:.6f} {lattice_matrix[2]:.6f}\n"
)
for site in self.structure:
file.write(
f"\t {Element(site.species_string).Z} 0.000000 "
f"{ang_to_bohr * site.coords[0]} "
f"{ang_to_bohr * site.coords[1]} "
f"{ang_to_bohr * site.coords[2]} \n"
)
for idx, dat in enumerate(self.data["total"].flatten(), start=1):
file.write(f"{' ' if dat > 0 else ''}{dat:.6e} ")
if idx % 6 == 0:
file.write("\n")
@classmethod
def from_cube(cls, filename: str | Path) -> Self:
"""
Initialize the cube object and store the data as data.
Args:
filename (str): of the cube to read
"""
file = zopen(filename, mode="rt")
# skip header lines
file.readline()
file.readline()
# number of atoms followed by the position of the origin of the volumetric data
line = file.readline().split()
n_atoms = int(line[0])
# The number of voxels along each axis (x, y, z) followed by the axis vector.
line = file.readline().split()
num_x_voxels = int(line[0])
voxel_x = np.array([bohr_to_angstrom * float(val) for val in line[1:]])
line = file.readline().split()
num_y_voxels = int(line[0])
voxel_y = np.array([bohr_to_angstrom * float(val) for val in line[1:]])
line = file.readline().split()
num_z_voxels = int(line[0])
voxel_z = np.array([bohr_to_angstrom * float(val) for val in line[1:]])
# The last section in the header is one line for each atom consisting of 5 numbers,
# the first is the atom number, second is charge,
# the last three are the x,y,z coordinates of the atom center.
sites = []
for _ in range(n_atoms):
line = file.readline().split()
sites.append(Site(line[0], np.multiply(bohr_to_angstrom, list(map(float, line[2:])))))
structure = Structure(
lattice=[
voxel_x * num_x_voxels,
voxel_y * num_y_voxels,
voxel_z * num_z_voxels,
],
species=[s.specie for s in sites],
coords=[s.coords for s in sites],
coords_are_cartesian=True,
)
# Volumetric data
data = np.reshape(
np.array(file.read().split()).astype(float),
(num_x_voxels, num_y_voxels, num_z_voxels),
)
return cls(structure=structure, data={"total": data})