Skip to content

Commit

Permalink
Avoid bader_caller from altering compressed file in place (#3660)
Browse files Browse the repository at this point in the history
* add type hint and fix mypy error

* fix calling of class

* format cleanups

* add type hint and format tweak

* remove unnecessary var

* simplify Bader executable fetch

* clean up __init__

* revert change of BaderAnalysis

* avoid changing compressed file in place

* revert removal of arg bader_path

* revise docstring

* fix chgcar_path possibly unbound pyright error

* format tweaks

---------

Co-authored-by: Janosh Riebesell <[email protected]>
  • Loading branch information
DanielYang59 and janosh authored Mar 1, 2024
1 parent d07164f commit 83806df
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 77 deletions.
200 changes: 124 additions & 76 deletions pymatgen/command_line/bader_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
from __future__ import annotations

import os
import shutil
import subprocess
import warnings
from datetime import datetime
from glob import glob
from pathlib import Path
from shutil import which
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, Any
Expand All @@ -42,11 +44,8 @@
__date__ = "4/5/13"


BADER_EXE = which("bader") or which("bader.exe")


class BaderAnalysis:
"""Performs Bader analysis for Cube files and VASP outputs.
"""Performs Bader charge analysis for Cube files or VASP outputs.
Attributes:
data (list[dict]): Atomic data parsed from bader analysis.
Expand All @@ -57,21 +56,20 @@ class BaderAnalysis:
nelectrons (int): Number of electrons of the Bader analysis.
chgcar (Chgcar): Chgcar object associated with input CHGCAR file.
atomic_densities (list[dict]): List of charge densities for each
atom centered on the atom. Excess 0's are removed from the array
to reduce its size. Each dictionary has the keys:
atom centered on the atom. Each dictionary has the keys:
"data", "shift", "dim", where "data" is the charge density array,
"shift" is the shift used to center the atomic charge density, and
"dim" is the dimension of the original charge density map.
"dim" is the dimension of the original charge density.
"""

def __init__(
self,
chgcar_filename: str = "",
potcar_filename: str = "",
chgref_filename: str = "",
parse_atomic_densities: bool = False,
cube_filename: str = "",
bader_exe_path: str | None = BADER_EXE,
bader_path: str | None = None,
parse_atomic_densities: bool = False,
) -> None:
"""Initializes the Bader caller.
Expand All @@ -80,44 +78,66 @@ def __init__(
potcar_filename (str): The filename of the POTCAR.
chgref_filename (str): The filename of the
reference charge density.
parse_atomic_densities (bool, optional): Enable atomic partition
of the charge density. Charge densities are atom centered.
Defaults to False.
cube_filename (str, optional): The filename of the cube file.
bader_exe_path (str, optional): The path to the bader executable.
bader_path (str, optional): The path to the bader executable.
parse_atomic_densities (bool, optional): Enable atomic partition of the
charge density. Charge densities are atom centered. Defaults to False.
"""
bader_exe = which(bader_exe_path or "")
if bader_exe is None:

def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
"""Utility function to copy a compressed file to a target directory (ScratchDir)
and decompress it, to avoid modifying files in place.
Parameters:
file (str | Path): The path to the compressed file to be decompressed.
target_dir (str, optional): The target directory where the decompressed file will be stored.
Defaults to "." (current directory).
Returns:
str: The path to the decompressed file if successful.
"""
file = Path(file)

if file.suffix.lower() in {".bz2", ".gz", ".z"}:
shutil.copy(file, f"{target_dir}/{file.name}")
out_file = decompress_file(f"{target_dir}/{file.name}")

if file: # to avoid mypy error
return str(out_file)

raise FileNotFoundError(f"File {out_file} not found.")

return str(file)

# Get and check Bader executable path
if bader_path is None:
bader_path = which("bader") or which("bader.exe")

if bader_path is None:
raise RuntimeError(
"BaderAnalysis requires bader or bader.exe to be in the PATH "
"the absolute path to the binary to be specified "
f"via {bader_exe_path=}. Download the binary at "
"https://theory.cm.utexas.edu/henkelman/code/bader."
"Requires bader or bader.exe to be in the PATH or the absolute path to "
f"the binary to be specified via {bader_path=}.\n"
"Download from https://theory.cm.utexas.edu/henkelman/code/bader."
)

# Check input cube/CHGCAR files
if not (cube_filename or chgcar_filename):
raise ValueError("You must provide either a cube file or a CHGCAR")
raise ValueError("You must provide either a Cube file or a CHGCAR.")
if cube_filename and chgcar_filename:
raise ValueError(
f"You cannot parse a cube and a CHGCAR at the same time. \n{cube_filename=}\n{chgcar_filename=}"
)
raise ValueError("Cannot parse cube and CHGCAR at the same time.")

self.parse_atomic_densities = parse_atomic_densities

with ScratchDir("."):
if chgcar_filename:
self.is_vasp = True
filepath = chgcar_fpath = temp_decompress(chgcar_filename)

# Decompress the file if compressed
fpath = chgcar_fpath = decompress_file(filepath=chgcar_filename) or chgcar_filename
self.chgcar = Chgcar.from_file(chgcar_fpath)
self.structure = self.chgcar.structure

self.potcar = Potcar.from_file(potcar_filename) if potcar_filename else None
self.natoms = self.chgcar.poscar.natoms

chgref_fpath = decompress_file(filepath=chgref_filename) or chgref_filename
self.reference_used = bool(chgref_filename)

# List of nelects for each atom from potcar
potcar_indices = []
for idx, val in enumerate(self.natoms):
Expand All @@ -128,80 +148,101 @@ def __init__(
else []
)

# Parse from Cube file
else:
self.is_vasp = False
fpath = cube_fpath = decompress_file(filepath=cube_filename) or cube_filename
filepath = cube_fpath = temp_decompress(cube_filename)

self.cube = VolumetricData.from_cube(cube_fpath)
self.structure = self.cube.structure
self.nelects = []
chgref_fpath = decompress_file(filepath=chgref_filename) or chgref_filename
self.reference_used = bool(chgref_filename)

args = [bader_exe, fpath]
# Prepare CHGCAR reference file
chgref_fpath = temp_decompress(chgref_filename)
self.reference_used = bool(chgref_filename)

bader_args = [bader_path, filepath]

if chgref_fpath:
args += ["-ref", chgref_fpath]
if self.reference_used:
bader_args += ["-ref", chgref_fpath]

if parse_atomic_densities:
args += ["-p", "all_atom"]
bader_args += ["-p", "all_atom"]

with subprocess.Popen(
args,
bader_args,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
close_fds=True,
) as proc:
stdout, stderr = proc.communicate()
if proc.returncode != 0:
raise RuntimeError(
f"{bader_exe} exit code: {proc.returncode}, "
f"error message: {stderr!s}.\nstdout: {stdout!s}"
"Please check your bader installation."
f"{bader_path} exit code: {proc.returncode}, error: {stderr!s}."
f"\nstdout: {stdout!s}. Please check your bader installation."
)

# Determine Bader version
try:
self.version = float(stdout.split()[5])
except ValueError:
self.version = -1 # Unknown

if self.version < 1.0:
warnings.warn(
"Your installed version of Bader is outdated, calculation of vacuum charge may be incorrect.",
UserWarning,
"Your installed version of Bader is outdated, calculation of vacuum charge may be incorrect."
)

data = []
with open("ACF.dat") as file:
lines = file.readlines()
headers = ("x", "y", "z", "charge", "min_dist", "atomic_vol")
lines.pop(0)
lines.pop(0)
while True:
line = lines.pop(0).strip()
if line.startswith("-"):
break
vals = map(float, line.split()[1:])
data.append(dict(zip(headers, vals)))
for line in lines:
tokens = line.strip().split(":")
if tokens[0] == "VACUUM CHARGE":
self.vacuum_charge = float(tokens[1])
elif tokens[0] == "VACUUM VOLUME":
self.vacuum_volume = float(tokens[1])
elif tokens[0] == "NUMBER OF ELECTRONS":
self.nelectrons = float(tokens[1])
self.data = data
# Parse ACF.dat file
self.data = self._parse_acf()

# Parse atomic densities
if self.parse_atomic_densities:
self._parse_atomic_densities()
self.atomic_densities = self._parse_atomic_densities()

def _parse_acf(self) -> list[dict]:
"""Parse Bader output file ACF.dat."""
with open("ACF.dat", encoding="us-ascii") as file:
lines = file.readlines()

# Skip header lines
headers = ("x", "y", "z", "charge", "min_dist", "atomic_vol")
lines.pop(0)
lines.pop(0)

data = []
while True:
line = lines.pop(0).strip()
if line.startswith("-"):
break
vals = map(float, line.split()[1:])
data.append(dict(zip(headers, vals)))

for line in lines:
tokens = line.strip().split(":")
if tokens[0] == "VACUUM CHARGE":
self.vacuum_charge = float(tokens[1])
elif tokens[0] == "VACUUM VOLUME":
self.vacuum_volume = float(tokens[1])
elif tokens[0] == "NUMBER OF ELECTRONS":
self.nelectrons = float(tokens[1])

return data

@deprecated(
message=(
"parse_atomic_densities was deprecated on 2024-02-26 "
"and will be removed on 2025-02-26. See https://"
"and will be removed on 2025-02-26.\nSee https://"
"github.com/materialsproject/pymatgen/issues/3652 for details."
)
)
def _parse_atomic_densities(self):
def _parse_atomic_densities(self) -> list[dict]:
"""Parse atom-centered charge densities with excess zeros removed.
Each dictionary has the keys:
"data", "shift", "dim", where "data" is the charge density array,
"shift" is the shift used to center the atomic charge density, and
"dim" is the dimension of the original charge density.
"""
# Deprecation tracker
if (
datetime(2025, 2, 26) < datetime.now()
Expand All @@ -210,7 +251,7 @@ def _parse_atomic_densities(self):
):
raise RuntimeError("This method should have been removed, see #3656.")

def slice_from_center(data, x_width, y_width, z_width):
def slice_from_center(data: np.ndarray, x_width: int, y_width: int, z_width: int) -> np.ndarray:
"""Slices a central window from the data array."""
x, y, z = data.shape
start_x = x // 2 - (x_width // 2)
Expand All @@ -222,7 +263,7 @@ def slice_from_center(data, x_width, y_width, z_width):
start_z : start_z + z_width,
]

def find_encompassing_vol(data: np.ndarray):
def find_encompassing_vol(data: np.ndarray) -> np.ndarray | None:
"""Find the central encompassing volume which
holds all the data within a precision.
"""
Expand Down Expand Up @@ -260,7 +301,8 @@ def find_encompassing_vol(data: np.ndarray):
"dim": self.chgcar.dim,
}
atomic_densities.append(dct)
self.atomic_densities = atomic_densities

return atomic_densities

def get_charge(self, atom_index: int) -> float:
"""Convenience method to get the charge on a particular atom. This is the "raw"
Expand Down Expand Up @@ -416,7 +458,7 @@ def from_path(cls, path: str, suffix: str = "") -> BaderAnalysis:
def _get_filepath(filename):
name_pattern = f"{filename}{suffix}*" if filename != "POTCAR" else f"{filename}*"
paths = glob(f"{path}/{name_pattern}")
fpath = ""
filepath = ""
if len(paths) >= 1:
# using reverse=True because, if multiple files are present, they likely
# have suffixes 'static', 'relax', 'relax2', etc. and this would give
Expand All @@ -425,15 +467,15 @@ def _get_filepath(filename):
paths.sort(reverse=True)
if len(paths) > 1:
warnings.warn(f"Multiple files detected, using {paths[0]}")
fpath = paths[0]
filepath = paths[0]
else:
msg = f"Could not find {filename!r}"
if filename in ("AECCAR0", "AECCAR2"):
msg += ", interpret Bader results with severe caution."
elif filename == "POTCAR":
msg += ", cannot calculate charge transfer."
warnings.warn(msg)
return fpath
return filepath

chgcar_filename = _get_filepath("CHGCAR")
if chgcar_filename is None:
Expand All @@ -455,7 +497,7 @@ def _get_filepath(filename):
)


def bader_analysis_from_path(path, suffix=""):
def bader_analysis_from_path(path: str, suffix: str = ""):
"""Convenience method to run Bader analysis on a folder containing
typical VASP output files.
Expand Down Expand Up @@ -489,7 +531,8 @@ def _get_filepath(filename: str, msg: str = "") -> str | None:
return paths[0]

chgcar_path = _get_filepath("CHGCAR", "Could not find CHGCAR!")
chgcar = Chgcar.from_file(chgcar_path)
if chgcar_path is not None:
chgcar = Chgcar.from_file(chgcar_path)

aeccar0_path = _get_filepath("AECCAR0")
if not aeccar0_path:
Expand All @@ -509,7 +552,12 @@ def _get_filepath(filename: str, msg: str = "") -> str | None:
return bader_analysis_from_objects(chgcar, potcar, aeccar0, aeccar2)


def bader_analysis_from_objects(chgcar, potcar=None, aeccar0=None, aeccar2=None):
def bader_analysis_from_objects(
chgcar: Chgcar,
potcar: Potcar | None = None,
aeccar0: Chgcar | None = None,
aeccar2: Chgcar | None = None,
):
"""Convenience method to run Bader analysis from a set
of pymatgen Chgcar and Potcar objects.
Expand Down
2 changes: 1 addition & 1 deletion tests/command_line/test_bader_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ def test_missing_file_bader_exe_path(self):
RuntimeError, match="BaderAnalysis requires the executable bader be in the PATH or the full path "
),
):
BaderAnalysis(chgcar_filename=f"{TEST_FILES_DIR}/CHGCAR.Fe3O4", bader_exe_path="")
BaderAnalysis(chgcar_filename=f"{TEST_FILES_DIR}/CHGCAR.Fe3O4", bader_path="")

0 comments on commit 83806df

Please sign in to comment.