Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zopen: explicit binary/text mode , and explicit encoding as UTF-8 #4219

Merged
merged 14 commits into from
Dec 11, 2024
2 changes: 1 addition & 1 deletion src/pymatgen/apps/borg/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def _get_transformation_history(path: PathLike):
"""Check for a transformations.json* file and return the history."""
if trans_json := glob(f"{path!s}/transformations.json*"):
try:
with zopen(trans_json[0]) as file:
with zopen(trans_json[0], mode="rt") as file:
return json.load(file)["history"]
except Exception:
return None
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/core/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2994,7 +2994,7 @@ def to(self, filename: PathLike = "", fmt: FileFormats = "", **kwargs) -> str:

geom_in = AimsGeometryIn.from_structure(self)
if filename:
with zopen(filename, mode="w") as file:
with zopen(filename, mode="wt") as file:
file.write(geom_in.get_header(filename))
file.write(geom_in.content)
file.write("\n")
Expand Down Expand Up @@ -4009,7 +4009,7 @@ def from_file(cls, filename: PathLike) -> Self | None:
"""
filename = str(filename)

with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
contents = file.read()
fname = filename.lower()
if fnmatch(fname, "*.xyz*"):
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -1761,5 +1761,5 @@ def write_file(
mode: Literal["w", "a", "wt", "at"] = "w",
Copy link
Contributor Author

@DanielYang59 DanielYang59 Dec 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would fail if filename is a name for compressed file (e.g. file.gz), in this case file would be BinaryIO and we cannot write str into it:

def write_file(
self,
filename: str | Path,
mode: Literal["w", "a", "wt", "at"] = "w",
) -> None:
"""Write the CIF file."""
with zopen(filename, mode=mode) as file:
file.write(str(self))

w and a have been removed in 4fccd59, with default changed to wt

) -> None:
"""Write the CIF file."""
with zopen(filename, mode=mode) as file:
with zopen(filename, mode=mode) as file: # DEBUG: bad default value
file.write(str(self))
2 changes: 1 addition & 1 deletion src/pymatgen/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def __getitem__(self, item):
f"No parser defined for {item}. Contents are returned as a string.",
UserWarning,
)
with zopen(fpath, "rt") as f:
with zopen(fpath, mode="rt") as f:
return f.read()

def get_files_by_name(self, name: str) -> dict[str, Any]:
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/cp2k/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def preprocessor(data: str, dir: str = ".") -> str: # noqa: A002
raise ValueError(f"length of inc should be 2, got {len(inc)}")
inc = inc[1].strip("'")
inc = inc.strip('"')
with zopen(os.path.join(dir, inc)) as file:
with zopen(os.path.join(dir, inc), mode="rt") as file:
data = re.sub(rf"{incl}", file.read(), data)
variable_sets = re.findall(r"(@SET.+)", data, re.IGNORECASE)
for match in variable_sets:
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/feff/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def header_string_from_file(filename: str = "feff.inp"):
Returns:
Reads header string.
"""
with zopen(filename, mode="r") as file:
with zopen(filename, mode="rt") as file:
lines = file.readlines()
feff_header_str = []
ln = 0
Expand Down
6 changes: 3 additions & 3 deletions src/pymatgen/io/feff/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def from_file(cls, feff_inp_file: str = "feff.inp", ldos_file: str = "ldos") ->
dos_index = 1
begin = 0

with zopen(pot_inp, mode="r") as potfile:
with zopen(pot_inp, mode="rt") as potfile:
for line in potfile:
if len(pot_read_end.findall(line)) > 0:
break
Expand All @@ -95,7 +95,7 @@ def from_file(cls, feff_inp_file: str = "feff.inp", ldos_file: str = "ldos") ->
dicts = Potential.pot_dict_from_str(pot_string)
pot_dict = dicts[0]

with zopen(f"{ldos_file}00.dat", mode="r") as file:
with zopen(f"{ldos_file}00.dat", mode="rt") as file:
lines = file.readlines()
e_fermi = float(lines[0].split()[4])

Expand Down Expand Up @@ -172,7 +172,7 @@ def charge_transfer_from_file(feff_inp_file, ldos_file):
pot_inp = re.sub(r"feff.inp", r"pot.inp", feff_inp_file)
pot_readstart = re.compile(".*iz.*lmaxsc.*xnatph.*xion.*folp.*")
pot_readend = re.compile(".*ExternalPot.*switch.*")
with zopen(pot_inp, mode="r") as potfile:
with zopen(pot_inp, mode="rt") as potfile:
for line in potfile:
if len(pot_readend.findall(line)) > 0:
break
Expand Down
10 changes: 5 additions & 5 deletions src/pymatgen/io/fiesta.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def __init__(self, filename):
"""
self.filename = filename

with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
basis_set = file.read()

self.data = self._parse_file(basis_set)
Expand Down Expand Up @@ -533,7 +533,7 @@ def write_file(self, filename: str | Path) -> None:
Args:
filename: Filename.
"""
with zopen(filename, mode="w") as file:
with zopen(filename, mode="wt") as file:
file.write(str(self))

def as_dict(self):
Expand Down Expand Up @@ -712,7 +712,7 @@ def from_file(cls, filename: str | Path) -> Self:
Returns:
FiestaInput object
"""
with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
return cls.from_str(file.read())


Expand All @@ -730,7 +730,7 @@ def __init__(self, filename):
"""
self.filename = filename

with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
data = file.read()

chunks = re.split(r"GW Driver iteration", data)
Expand Down Expand Up @@ -821,7 +821,7 @@ def __init__(self, filename):
"""
self.filename = filename

with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
log_bse = file.read()

# self.job_info = self._parse_preamble(preamble)
Expand Down
10 changes: 5 additions & 5 deletions src/pymatgen/io/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def from_file(cls, filename: str | Path) -> Self:
Returns:
GaussianInput object
"""
with zopen(filename, mode="r") as file:
with zopen(filename, mode="rt") as file:
return cls.from_str(file.read())

def get_zmatrix(self):
Expand Down Expand Up @@ -447,9 +447,9 @@ def para_dict_to_str(para, joiner=" "):
def write_file(self, filename, cart_coords=False):
"""Write the input string into a file.

Option: see __str__ method
Option: see `__str__` method
"""
with zopen(filename, mode="w") as file:
with zopen(filename, mode="wt") as file:
file.write(self.to_str(cart_coords))

def as_dict(self):
Expand Down Expand Up @@ -1102,7 +1102,7 @@ def read_scan(self):
data = {"energies": [], "coords": {}}

# read in file
with zopen(self.filename, mode="r") as file:
with zopen(self.filename, mode="rt") as file:
line = file.readline()

while line != "":
Expand Down Expand Up @@ -1188,7 +1188,7 @@ def read_excitation_energies(self):
transitions = []

# read in file
with zopen(self.filename, mode="r") as file:
with zopen(self.filename, mode="rt") as file:
line = file.readline()
td = False
while line != "":
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/lammps/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_input_set(self, structure: Structure | LammpsData | CombinedData) -> Lam
data: LammpsData = LammpsData.from_structure(structure) if isinstance(structure, Structure) else structure

# Load the template
with zopen(self.template, mode="r") as file:
with zopen(self.template, mode="rt") as file:
template_str = file.read()

# Replace all variables
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/lobster/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def _get_potcar_symbols(POTCAR_input: PathLike) -> list[str]:
raise ValueError("Lobster only works with PAW! Use different POTCARs")

# Warning about a bug in LOBSTER-4.1.0
with zopen(POTCAR_input, mode="r") as file:
with zopen(POTCAR_input, mode="rt") as file:
data = file.read()

if isinstance(data, bytes):
Expand Down
6 changes: 3 additions & 3 deletions src/pymatgen/io/nwchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def write_file(self, filename):
Args:
filename (str): Filename.
"""
with zopen(filename, mode="w") as file:
with zopen(filename, mode="wt") as file:
file.write(str(self))

def as_dict(self):
Expand Down Expand Up @@ -531,7 +531,7 @@ def from_file(cls, filename: str | Path) -> Self:
Returns:
NwInput object
"""
with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
return cls.from_str(file.read())


Expand All @@ -554,7 +554,7 @@ def __init__(self, filename):
"""
self.filename = filename

with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
data = file.read()

chunks = re.split(r"NWChem Input Module", data)
Expand Down
2 changes: 2 additions & 0 deletions src/pymatgen/io/qchem/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self, filename: str):
self.data["errors"] = []
self.data["warnings"] = {}
self.text = ""
# TODO: why not utf-8 encoding?
with zopen(filename, mode="rt", encoding="ISO-8859-1") as file:
self.text = file.read()

Expand Down Expand Up @@ -2951,6 +2952,7 @@ def nbo_parser(filename: str) -> dict[str, list[pd.DataFrame]]:
RuntimeError: If a section cannot be found.
"""
# Open the lines
# TODO: why not utf-8 encoding?
with zopen(filename, mode="rt", encoding="ISO-8859-1") as file:
lines = file.readlines()

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/io/res.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _parse_str(cls, source: str) -> Res:
def _parse_file(cls, filename: str | Path) -> Res:
"""Parse the res file as a file."""
self = cls()
with zopen(filename, mode="r") as file:
with zopen(filename, mode="rt") as file:
self.source = file.read()
return self._parse_txt()

Expand Down Expand Up @@ -335,7 +335,7 @@ def string(self) -> str:

def write(self, filename: str) -> None:
"""Write the res data to a file."""
with zopen(filename, mode="w") as file:
with zopen(filename, mode="wt") as file:
file.write(str(self))


Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_input_set(
self.filename = str(filename)

# Load the template
with zopen(self.template, mode="r") as file:
with zopen(self.template, mode="rt") as file:
template_str = file.read()

# Replace all variables
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/io/vasp/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5394,7 +5394,7 @@ def __init__(
self.occu_tol = occu_tol
self.separate_spins = separate_spins

with zopen(filename, mode="r") as file:
with zopen(filename, mode="rt") as file:
self.ispin = int(file.readline().split()[-1])

# Remove useless header information
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/io/zeopp.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def from_file(cls, filename: str | Path) -> Self:
Returns:
ZeoCssr object.
"""
with zopen(filename, mode="r") as file:
with zopen(filename, mode="rt") as file:
return cls.from_str(file.read())


Expand Down Expand Up @@ -200,7 +200,7 @@ def from_file(cls, filename: str | Path) -> Self:
Returns:
XYZ object
"""
with zopen(filename) as file:
with zopen(filename, mode="rt") as file:
return cls.from_str(file.read())

def __str__(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion tests/electronic_structure/test_dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class TestCompleteDos(TestCase):
def setUp(self):
with open(f"{TEST_DIR}/complete_dos.json") as file:
self.dos = CompleteDos.from_dict(json.load(file))
with zopen(f"{TEST_DIR}/pdag3_complete_dos.json.gz") as file:
with zopen(f"{TEST_DIR}/pdag3_complete_dos.json.gz", mode="rt") as file:
self.dos_pdag3 = CompleteDos.from_dict(json.load(file))

def test_get_gap(self):
Expand Down
1 change: 1 addition & 0 deletions tests/io/qchem/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def test_process_parsed_hess(self):
binary = file.read()
data_132 = [struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0] for ii in range(len(binary) // 8)]

# TODO: why not utf-8 encoding?
with zopen(f"{TEST_DIR}/parse_hess/HESS", mode="rt", encoding="ISO-8859-1") as file:
data_hess = file.readlines()

Expand Down
2 changes: 1 addition & 1 deletion tests/io/vasp/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,7 @@ def test_consistency(self):
wder_ref = np.loadtxt(f"{VASP_OUT_DIR}/WAVEDERF.Si.gz", skiprows=1)

def _check(wder):
with zopen(f"{VASP_OUT_DIR}/WAVEDERF.Si.gz") as file:
with zopen(f"{VASP_OUT_DIR}/WAVEDERF.Si.gz", mode="rt") as file:
first_line = [int(a) for a in file.readline().split()]
assert wder.nkpoints == first_line[1]
assert wder.nbands == first_line[2]
Expand Down
Loading