Skip to content

Commit

Permalink
Merge pull request #733 from DanielYang59/encoding-warning-follow-pep597
Browse files Browse the repository at this point in the history
Encoding warning use PEP 597 env var `PYTHONWARNDEFAULTENCODING`
  • Loading branch information
shyuep authored Jan 10, 2025
2 parents e3db9c0 + 275ca4c commit 35ffd2d
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 27 deletions.
3 changes: 2 additions & 1 deletion src/monty/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def deprecated(
replacement (Callable | str): A replacement class or function.
message (str): A warning message to be displayed.
deadline (Optional[tuple[int, int, int]]): Optional deadline for removal
of the old function/class, in format (yyyy, MM, dd).
of the old function/class, in format (yyyy, MM, dd). A CI warning would
be raised after this date if is running in code owner' repo.
category (Warning): Choose the category of the warning to issue. Defaults
to FutureWarning. Another choice can be DeprecationWarning. Note that
FutureWarning is meant for end users and is always shown unless silenced.
Expand Down
27 changes: 15 additions & 12 deletions src/monty/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,20 @@ def zopen(
This function wraps around `[bz2/gzip/lzma].open` and `open`
to deal intelligently with compressed or uncompressed files.
Supports context manager:
`with zopen(filename, mode="rt", ...)`.
`with zopen(filename, mode="rt", ...)`
Important Notes:
- Default `mode` should not be used, and would not be allow
in future versions.
- Always explicitly specify binary/text in `mode`, i.e.
always pass `t` or `b` in `mode`, implicit binary/text
mode would not be allow in future versions.
- Always provide an explicit `encoding` in text mode.
- Always provide an explicit `encoding` in text mode, it would
be set to UTF-8 by default otherwise.
Args:
filename (str | Path): The file to open.
mode (str): The mode in which the file is opened, you MUST
mode (str): The mode in which the file is opened, you should
explicitly specify "b" for binary or "t" for text.
**kwargs: Additional keyword arguments to pass to `open`.
Expand Down Expand Up @@ -79,14 +80,16 @@ def zopen(
stacklevel=2,
)

# Warn against default `encoding` in text mode
# Warn against default `encoding` in text mode if
# `PYTHONWARNDEFAULTENCODING` environment variable is set (PEP 597)
if "t" in mode and kwargs.get("encoding", None) is None:
warnings.warn(
"We strongly encourage explicit `encoding`, "
"and we would use UTF-8 by default as per PEP 686",
category=EncodingWarning,
stacklevel=2,
)
if os.getenv("PYTHONWARNDEFAULTENCODING", False):
warnings.warn(
"We strongly encourage explicit `encoding`, "
"and we would use UTF-8 by default as per PEP 686",
category=EncodingWarning,
stacklevel=2,
)
kwargs["encoding"] = "utf-8"

_name, ext = os.path.splitext(filename)
Expand Down Expand Up @@ -141,7 +144,7 @@ def _get_line_ending(
If file is empty, "\n" would be used as default.
"""
if isinstance(file, (str, Path)):
with zopen(file, "rb") as f:
with zopen(file, mode="rb") as f:
first_line = f.readline()
elif isinstance(file, io.TextIOWrapper):
first_line = file.buffer.readline() # type: ignore[attr-defined]
Expand Down Expand Up @@ -187,7 +190,7 @@ def reverse_readfile(
l_end = _get_line_ending(filename)
len_l_end = len(l_end)

with zopen(filename, "rb") as file:
with zopen(filename, mode="rb") as file:
if isinstance(file, (gzip.GzipFile, bz2.BZ2File)):
for line in reversed(file.readlines()):
# "readlines" would keep the line end character
Expand Down
2 changes: 1 addition & 1 deletion src/monty/re.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def regrep(
gen = (
reverse_readfile(filename)
if reverse
else zopen(filename, "rt", encoding="utf-8")
else zopen(filename, mode="rt", encoding="utf-8")
)
for i, line in enumerate(gen):
for k, p in compiled.items():
Expand Down
32 changes: 22 additions & 10 deletions src/monty/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,15 @@

if TYPE_CHECKING:
from pathlib import Path
from typing import Any, Optional, TextIO, Union
from typing import Any, Literal, TextIO, Union


def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) -> Any:
def loadfn(
fn: Union[str, Path],
*args,
fmt: Literal["json", "yaml", "mpk"] | None = None,
**kwargs,
) -> Any:
"""
Loads json/yaml/msgpack directly from a filename instead of a
File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z")
Expand All @@ -39,9 +44,8 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) ->
Args:
fn (str/Path): filename or pathlib.Path.
*args: Any of the args supported by json/yaml.load.
fmt (string): If specified, the fmt specified would be used instead
of autodetection from filename. Supported formats right now are
"json", "yaml" or "mpk".
fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would
be used instead of autodetection from filename.
**kwargs: Any of the kwargs supported by json/yaml.load.
Returns:
Expand All @@ -64,10 +68,10 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) ->
)
if "object_hook" not in kwargs:
kwargs["object_hook"] = object_hook
with zopen(fn, "rb") as fp:
with zopen(fn, mode="rb") as fp:
return msgpack.load(fp, *args, **kwargs) # pylint: disable=E1101
else:
with zopen(fn, "rt", encoding="utf-8") as fp:
with zopen(fn, mode="rt", encoding="utf-8") as fp:
if fmt == "yaml":
if YAML is None:
raise RuntimeError("Loading of YAML files requires ruamel.yaml.")
Expand All @@ -81,7 +85,13 @@ def loadfn(fn: Union[str, Path], *args, fmt: Optional[str] = None, **kwargs) ->
raise TypeError(f"Invalid format: {fmt}")


def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None:
def dumpfn(
obj: object,
fn: Union[str, Path],
*args,
fmt: Literal["json", "yaml", "mpk"] | None = None,
**kwargs,
) -> None:
"""
Dump to a json/yaml directly by filename instead of a
File-like object. File may also be a BZ2 (".BZ2") or GZIP (".GZ", ".Z")
Expand All @@ -95,6 +105,8 @@ def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None
Args:
obj (object): Object to dump.
fn (str/Path): filename or pathlib.Path.
fmt ("json" | "yaml" | "mpk"): If specified, the fmt specified would
be used instead of autodetection from filename.
*args: Any of the args supported by json/yaml.dump.
**kwargs: Any of the kwargs supported by json/yaml.dump.
Expand All @@ -117,10 +129,10 @@ def dumpfn(obj: object, fn: Union[str, Path], *args, fmt=None, **kwargs) -> None
)
if "default" not in kwargs:
kwargs["default"] = default
with zopen(fn, "wb") as fp:
with zopen(fn, mode="wb") as fp:
msgpack.dump(obj, fp, *args, **kwargs) # pylint: disable=E1101
else:
with zopen(fn, "wt", encoding="utf-8") as fp:
with zopen(fn, mode="wt", encoding="utf-8") as fp:
fp = cast(TextIO, fp)

if fmt == "yaml":
Expand Down
4 changes: 2 additions & 2 deletions src/monty/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def compress_file(
else:
compressed_file = f"{str(filepath)}.{compression}"

with open(filepath, "rb") as f_in, zopen(compressed_file, "wb") as f_out:
with open(filepath, "rb") as f_in, zopen(compressed_file, mode="wb") as f_out:
f_out.writelines(f_in)

os.remove(filepath)
Expand Down Expand Up @@ -157,7 +157,7 @@ def decompress_file(
else:
decompressed_file = str(filepath).removesuffix(file_ext)

with zopen(filepath, "rb") as f_in, open(decompressed_file, "wb") as f_out:
with zopen(filepath, mode="rb") as f_in, open(decompressed_file, "wb") as f_out:
f_out.writelines(f_in)

os.remove(filepath)
Expand Down
17 changes: 16 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,18 +433,33 @@ def test_lzw_files(self):
f.read()

@pytest.mark.parametrize("extension", [".txt", ".bz2", ".gz", ".xz", ".lzma"])
def test_warnings(self, extension):
def test_warnings(self, extension, monkeypatch):
filename = f"test_warning{extension}"
content = "Test warning"

with ScratchDir("."):
monkeypatch.setenv("PYTHONWARNDEFAULTENCODING", "1")

# Default `encoding` warning
with (
pytest.warns(EncodingWarning, match="use UTF-8 by default"),
zopen(filename, "wt") as f,
):
f.write(content)

# No encoding warning if `PYTHONWARNDEFAULTENCODING` not set
monkeypatch.delenv("PYTHONWARNDEFAULTENCODING", raising=False)

with warnings.catch_warnings():
warnings.filterwarnings(
"error",
"We strongly encourage explicit `encoding`",
EncodingWarning,
)

with zopen(filename, "wt") as f:
f.write(content)

# Implicit text/binary `mode` warning
warnings.filterwarnings(
"ignore", category=EncodingWarning, message="argument not specified"
Expand Down

0 comments on commit 35ffd2d

Please sign in to comment.