materialsproject · janosh · Mar 1, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
@@ -80,18 +80,16 @@ def __init__(
                 reference charge density.
             cube_filename (str, optional): The filename of the cube file.
             bader_path (str, optional): The path to the bader executable.
-            parse_atomic_densities (bool, optional): Enable atomic partition
-                of the charge density. Charge densities are atom centered.
-                Defaults to False.
+            parse_atomic_densities (bool, optional): Enable atomic partition of the
+                charge density. Charge densities are atom centered. Defaults to False.
         """
 
         def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
-            """
-            Utility function to copy a compressed file to a target directory (ScratchDir)
+            """Utility function to copy a compressed file to a target directory (ScratchDir)
             and decompress it, to avoid modifying files in place.
 
             Parameters:
-                file (str or Path): The path to the compressed file to be decompressed.
+                file (str | Path): The path to the compressed file to be decompressed.
                 target_dir (str, optional): The target directory where the decompressed file will be stored.
                     Defaults to "." (current directory).
 
@@ -100,14 +98,14 @@ def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
             """
             file = Path(file)
 
-            if file.suffix.lower() in [".bz2", ".gz", ".z"]:
+            if file.suffix.lower() in (".bz2", ".gz", ".z"):
                 shutil.copy(file, f"{target_dir}/{file.name}")
-                outputfile = decompress_file(f"{target_dir}/{file.name}")
+                out_file = decompress_file(f"{target_dir}/{file.name}")
 
                 if file:  # to avoid mypy error
-                    return str(outputfile)
+                    return str(out_file)
 
-                raise FileNotFoundError(f"File {outputfile} not found.")
+                raise FileNotFoundError(f"File {out_file} not found.")
 
             return str(file)
 
@@ -117,22 +115,22 @@ def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
 
         if bader_path is None:
             raise RuntimeError(
-                "Requires bader or bader.exe to be in the PATH.\n"
+                "Requires bader or bader.exe to be in the PATH or the absolute path to "
+                f"the binary to be specified via {bader_path=}.\n"
                 "Download from https://theory.cm.utexas.edu/henkelman/code/bader."
             )
 
         # Check input cube/CHGCAR files
         if not (cube_filename or chgcar_filename):
-            raise ValueError("You must provide either a cube file or a CHGCAR.")
+            raise ValueError("You must provide either a Cube file or a CHGCAR.")
         if cube_filename and chgcar_filename:
             raise ValueError("Cannot parse cube and CHGCAR at the same time.")
 
-        # Flag for parse atomic partitioned charge density
         self.parse_atomic_densities = parse_atomic_densities
 
         with ScratchDir("."):
             if chgcar_filename:
-                fpath = chgcar_fpath = temp_decompress(chgcar_filename)
+                filepath = chgcar_fpath = temp_decompress(chgcar_filename)
 
                 self.chgcar = Chgcar.from_file(chgcar_fpath)
                 self.structure = self.chgcar.structure
@@ -150,20 +148,19 @@ def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
                     else []
                 )
 
-            # Parse from cube file
+            # Parse from Cube file
             else:
-                fpath = cube_fpath = temp_decompress(cube_filename)
+                filepath = cube_fpath = temp_decompress(cube_filename)
 
                 self.cube = VolumetricData.from_cube(cube_fpath)
                 self.structure = self.cube.structure
                 self.nelects = []
 
-            # Compile CHGCAR reference file
+            # prepare CHGCAR file
             chgref_fpath = temp_decompress(chgref_filename)
             self.reference_used = bool(chgref_filename)
 
-            # Compile Bader args
-            bader_args = [bader_path, fpath]
+            bader_args = [bader_path, filepath]
 
             if self.reference_used:
                 bader_args += ["-ref", chgref_fpath]
@@ -180,9 +177,8 @@ def temp_decompress(file: str | Path, target_dir: str = ".") -> str:
                 stdout, stderr = proc.communicate()
                 if proc.returncode != 0:
                     raise RuntimeError(
-                        f"{bader_path} exit code: {proc.returncode}, "
-                        f"error message: {stderr!s}.\nstdout: {stdout!s}"
-                        "Please check your bader installation."
+                        f"{bader_path} exit code: {proc.returncode}, error: {stderr!s}."
+                        f"\nstdout: {stdout!s}. Please check your bader installation."
                     )
 
             # Determine Bader version
@@ -458,7 +454,7 @@ def from_path(cls, path: str, suffix: str = "") -> BaderAnalysis:
         def _get_filepath(filename):
             name_pattern = f"{filename}{suffix}*" if filename != "POTCAR" else f"{filename}*"
             paths = glob(f"{path}/{name_pattern}")
-            fpath = ""
+            filepath = ""
             if len(paths) >= 1:
                 # using reverse=True because, if multiple files are present, they likely
                 # have suffixes 'static', 'relax', 'relax2', etc. and this would give
@@ -467,15 +463,15 @@ def _get_filepath(filename):
                 paths.sort(reverse=True)
                 if len(paths) > 1:
                     warnings.warn(f"Multiple files detected, using {paths[0]}")
-                fpath = paths[0]
+                filepath = paths[0]
             else:
                 msg = f"Could not find {filename!r}"
                 if filename in ("AECCAR0", "AECCAR2"):
                     msg += ", interpret Bader results with severe caution."
                 elif filename == "POTCAR":
                     msg += ", cannot calculate charge transfer."
                 warnings.warn(msg)
-            return fpath
+            return filepath
 
         chgcar_filename = _get_filepath("CHGCAR")
         if chgcar_filename is None:
@@ -531,8 +527,9 @@ def _get_filepath(filename: str, msg: str = "") -> str | None:
         return paths[0]
 
     chgcar_path = _get_filepath("CHGCAR", "Could not find CHGCAR!")
-    if chgcar_path is not None:
-        chgcar = Chgcar.from_file(chgcar_path)
+    if chgcar_path is None:
+        raise FileNotFoundError("Could not find CHGCAR!")
     @classmethod 
     def from_file(cls, filename: str): 
         """Read a CHGCAR file. 
         Args: 
             filename (str): Path to CHGCAR file. 
         Returns: 
             Chgcar 
         """ 
         poscar, data, data_aug = VolumetricData.parse_file(filename) 
         return cls(poscar, data, data_aug=data_aug) 
     @staticmethod 
     def parse_file(filename: str) -> tuple[Poscar, dict, dict]: 
         """ 
         Convenience method to parse a generic volumetric data file in the vasp 
         like format. Used by subclasses for parsing file. 
         Args: 
             filename (str): Path of file to parse 
         Returns: 
             tuple[Poscar, dict, dict]: Poscar object, data dict, data_aug dict 
         """ 
         poscar_read = False 
         poscar_string: list[str] = [] 
         dataset: np.ndarray = np.zeros((1, 1, 1)) 
         all_dataset: list[np.ndarray] = [] 
         # for holding any strings in input that are not Poscar 
         # or VolumetricData (typically augmentation charges) 
         all_dataset_aug: dict[int, list[str]] = {} 
         dim: list[int] = [] 
         dimline = "" 
         read_dataset = False 
         ngrid_pts = 0 
         data_count = 0 
         poscar = None 
         with zopen(filename, mode="rt") as file: 
             for line in file: 
                 original_line = line 
                 line = line.strip() 
                 if read_dataset: 
                     for tok in line.split(): 
                         if data_count < ngrid_pts: 
                             # This complicated procedure is necessary because 
                             # vasp outputs x as the fastest index, followed by y 
                             # then z. 
                             no_x = data_count // dim[0] 
                             dataset[data_count % dim[0], no_x % dim[1], no_x // dim[1]] = float(tok) 
                             data_count += 1 
                     if data_count >= ngrid_pts: 
                         read_dataset = False 
                         data_count = 0 
                         all_dataset.append(dataset) 
                 elif not poscar_read: 
                     if line != "" or len(poscar_string) == 0: 
                         poscar_string.append(line) 
                     elif line == "": 
                         poscar = Poscar.from_str("\n".join(poscar_string)) 
                         poscar_read = True 
                 elif not dim: 
                     dim = [int(i) for i in line.split()] 
                     ngrid_pts = dim[0] * dim[1] * dim[2] 
                     dimline = line 
                     read_dataset = True 
                     dataset = np.zeros(dim) 
                 elif line == dimline: 
                     # when line == dimline, expect volumetric data to follow 
                     # so set read_dataset to True 
                     read_dataset = True 
                     dataset = np.zeros(dim) 
                 else: 
                     # store any extra lines that were not part of the 
                     # volumetric data so we know which set of data the extra 
                     # lines are associated with 
                     key = len(all_dataset) - 1 
                     if key not in all_dataset_aug: 
                         all_dataset_aug[key] = [] 
                     all_dataset_aug[key].append(original_line) 
             if len(all_dataset) == 4: 
                 data = { 
                     "total": all_dataset[0], 
                     "diff_x": all_dataset[1], 
                     "diff_y": all_dataset[2], 
                     "diff_z": all_dataset[3], 
                 } 
                 data_aug = { 
                     "total": all_dataset_aug.get(0), 
                     "diff_x": all_dataset_aug.get(1), 
                     "diff_y": all_dataset_aug.get(2), 
                     "diff_z": all_dataset_aug.get(3), 
                 } 
                 # construct a "diff" dict for scalar-like magnetization density, 
                 # referenced to an arbitrary direction (using same method as 
                 # pymatgen.electronic_structure.core.Magmom, see 
                 # Magmom documentation for justification for this) 
                 # TODO: re-examine this, and also similar behavior in 
                 # Magmom - @mkhorton 
                 # TODO: does CHGCAR change with different SAXIS? 
                 diff_xyz = np.array([data["diff_x"], data["diff_y"], data["diff_z"]]) 
                 diff_xyz = diff_xyz.reshape((3, dim[0] * dim[1] * dim[2])) 
                 ref_direction = np.array([1.01, 1.02, 1.03]) 
                 ref_sign = np.sign(np.dot(ref_direction, diff_xyz)) 
                 diff = np.multiply(np.linalg.norm(diff_xyz, axis=0), ref_sign) 
                 data["diff"] = diff.reshape((dim[0], dim[1], dim[2])) 
             elif len(all_dataset) == 2: 
                 data = {"total": all_dataset[0], "diff": all_dataset[1]} 
                 data_aug = { 
                     "total": all_dataset_aug.get(0), 
                     "diff": all_dataset_aug.get(1), 
                 } 
             else: 
                 data = {"total": all_dataset[0]} 
                 data_aug = {"total": all_dataset_aug.get(0)} 
             return poscar, data, data_aug  # type: ignore[return-value] 
     @classmethod 
     def from_file(cls, filename: str): 
         """Read a CHGCAR file. 
  
         Args: 
             filename (str): Path to CHGCAR file. 
  
         Returns: 
             Chgcar 
         """ 
         poscar, data, data_aug = VolumetricData.parse_file(filename) 
         return cls(poscar, data, data_aug=data_aug) 
     @staticmethod 
     def parse_file(filename: str) -> tuple[Poscar, dict, dict]: 
         """ 
         Convenience method to parse a generic volumetric data file in the vasp 
         like format. Used by subclasses for parsing file. 
  
         Args: 
             filename (str): Path of file to parse 
  
         Returns: 
             tuple[Poscar, dict, dict]: Poscar object, data dict, data_aug dict 
         """ 
         poscar_read = False 
         poscar_string: list[str] = [] 
         dataset: np.ndarray = np.zeros((1, 1, 1)) 
         all_dataset: list[np.ndarray] = [] 
         # for holding any strings in input that are not Poscar 
         # or VolumetricData (typically augmentation charges) 
         all_dataset_aug: dict[int, list[str]] = {} 
         dim: list[int] = [] 
         dimline = "" 
         read_dataset = False 
         ngrid_pts = 0 
         data_count = 0 
         poscar = None 
         with zopen(filename, mode="rt") as file: 
             for line in file: 
                 original_line = line 
                 line = line.strip() 
                 if read_dataset: 
                     for tok in line.split(): 
                         if data_count < ngrid_pts: 
                             # This complicated procedure is necessary because 
                             # vasp outputs x as the fastest index, followed by y 
                             # then z. 
                             no_x = data_count // dim[0] 
                             dataset[data_count % dim[0], no_x % dim[1], no_x // dim[1]] = float(tok) 
                             data_count += 1 
                     if data_count >= ngrid_pts: 
                         read_dataset = False 
                         data_count = 0 
                         all_dataset.append(dataset) 
                 elif not poscar_read: 
                     if line != "" or len(poscar_string) == 0: 
                         poscar_string.append(line) 
                     elif line == "": 
                         poscar = Poscar.from_str("\n".join(poscar_string)) 
                         poscar_read = True 
                 elif not dim: 
                     dim = [int(i) for i in line.split()] 
                     ngrid_pts = dim[0] * dim[1] * dim[2] 
                     dimline = line 
                     read_dataset = True 
                     dataset = np.zeros(dim) 
                 elif line == dimline: 
                     # when line == dimline, expect volumetric data to follow 
                     # so set read_dataset to True 
                     read_dataset = True 
                     dataset = np.zeros(dim) 
                 else: 
                     # store any extra lines that were not part of the 
                     # volumetric data so we know which set of data the extra 
                     # lines are associated with 
                     key = len(all_dataset) - 1 
                     if key not in all_dataset_aug: 
                         all_dataset_aug[key] = [] 
                     all_dataset_aug[key].append(original_line) 
             if len(all_dataset) == 4: 
                 data = { 
                     "total": all_dataset[0], 
                     "diff_x": all_dataset[1], 
                     "diff_y": all_dataset[2], 
                     "diff_z": all_dataset[3], 
                 } 
                 data_aug = { 
                     "total": all_dataset_aug.get(0), 
                     "diff_x": all_dataset_aug.get(1), 
                     "diff_y": all_dataset_aug.get(2), 
                     "diff_z": all_dataset_aug.get(3), 
                 } 
  
                 # construct a "diff" dict for scalar-like magnetization density, 
                 # referenced to an arbitrary direction (using same method as 
                 # pymatgen.electronic_structure.core.Magmom, see 
                 # Magmom documentation for justification for this) 
                 # TODO: re-examine this, and also similar behavior in 
                 # Magmom - @mkhorton 
                 # TODO: does CHGCAR change with different SAXIS? 
                 diff_xyz = np.array([data["diff_x"], data["diff_y"], data["diff_z"]]) 
                 diff_xyz = diff_xyz.reshape((3, dim[0] * dim[1] * dim[2])) 
                 ref_direction = np.array([1.01, 1.02, 1.03]) 
                 ref_sign = np.sign(np.dot(ref_direction, diff_xyz)) 
                 diff = np.multiply(np.linalg.norm(diff_xyz, axis=0), ref_sign) 
                 data["diff"] = diff.reshape((dim[0], dim[1], dim[2])) 
  
             elif len(all_dataset) == 2: 
                 data = {"total": all_dataset[0], "diff": all_dataset[1]} 
                 data_aug = { 
                     "total": all_dataset_aug.get(0), 
                     "diff": all_dataset_aug.get(1), 
                 } 
             else: 
                 data = {"total": all_dataset[0]} 
                 data_aug = {"total": all_dataset_aug.get(0)} 
             return poscar, data, data_aug  # type: ignore[return-value] 
+    chgcar = Chgcar.from_file(chgcar_path)
 
     aeccar0_path = _get_filepath("AECCAR0")
     if not aeccar0_path:
@@ -554,9 +551,9 @@ def _get_filepath(filename: str, msg: str = "") -> str | None:
 
 def bader_analysis_from_objects(
     chgcar: Chgcar,
-    potcar: Potcar = None,
-    aeccar0: Chgcar = None,
-    aeccar2: Chgcar = None,
+    potcar: Potcar | None = None,
+    aeccar0: Chgcar | None = None,
+    aeccar2: Chgcar | None = None,
 ):
     """Convenience method to run Bader analysis from a set
     of pymatgen Chgcar and Potcar objects.