Skip to content

Commit

Permalink
Adopt more UTF-8 (#4309)
Browse files Browse the repository at this point in the history
  • Loading branch information
abravalheri authored Apr 22, 2024
2 parents d756377 + 1c91ac8 commit 1ed7591
Show file tree
Hide file tree
Showing 18 changed files with 199 additions and 92 deletions.
7 changes: 7 additions & 0 deletions newsfragments/4309.removal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Further adoption of UTF-8 in ``setuptools``.
This change regards mostly files produced and consumed during the build process
(e.g. metadata files, script wrappers, automatically updated config files, etc..)
Although precautions were taken to minimize disruptions, some edge cases might
be subject to backwards incompatibility.

Support for ``"locale"`` encoding is now **deprecated**.
44 changes: 37 additions & 7 deletions pkg_resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1524,8 +1524,7 @@ def run_script(self, script_name, namespace):
script_filename = self._fn(self.egg_info, script)
namespace['__file__'] = script_filename
if os.path.exists(script_filename):
with open(script_filename) as fid:
source = fid.read()
source = _read_utf8_with_fallback(script_filename)
code = compile(source, script_filename, 'exec')
exec(code, namespace, namespace)
else:
Expand Down Expand Up @@ -2175,11 +2174,10 @@ def non_empty_lines(path):
"""
Yield non-empty lines from file at path
"""
with open(path) as f:
for line in f:
line = line.strip()
if line:
yield line
for line in _read_utf8_with_fallback(path).splitlines():
line = line.strip()
if line:
yield line


def resolve_egg_link(path):
Expand Down Expand Up @@ -3323,3 +3321,35 @@ def _initialize_master_working_set():
# match order
list(map(working_set.add_entry, sys.path))
globals().update(locals())


# ---- Ported from ``setuptools`` to avoid introducing an import inter-dependency ----
LOCALE_ENCODING = "locale" if sys.version_info >= (3, 10) else None


def _read_utf8_with_fallback(file: str, fallback_encoding=LOCALE_ENCODING) -> str:
"""See setuptools.unicode_utils._read_utf8_with_fallback"""
try:
with open(file, "r", encoding="utf-8") as f:
return f.read()
except UnicodeDecodeError: # pragma: no cover
msg = f"""\
********************************************************************************
`encoding="utf-8"` fails with {file!r}, trying `encoding={fallback_encoding!r}`.
This fallback behaviour is considered **deprecated** and future versions of
`setuptools/pkg_resources` may not implement it.
Please encode {file!r} with "utf-8" to ensure future builds will succeed.
If this file was produced by `setuptools` itself, cleaning up the cached files
and re-building/re-installing the package with a newer version of `setuptools`
(e.g. by updating `build-system.requires` in its `pyproject.toml`)
might solve the problem.
********************************************************************************
"""
# TODO: Add a deadline?
# See comment in setuptools.unicode_utils._Utf8EncodingNeeded
warnings.warns(msg, PkgResourcesDeprecationWarning, stacklevel=2)
with open(file, "r", encoding=fallback_encoding) as f:
return f.read()
5 changes: 3 additions & 2 deletions setuptools/_imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import importlib.util
import importlib.machinery
import tokenize

from importlib.util import module_from_spec

Expand Down Expand Up @@ -60,13 +61,13 @@ def find_module(module, paths=None):

if suffix in importlib.machinery.SOURCE_SUFFIXES:
kind = PY_SOURCE
file = tokenize.open(path)
elif suffix in importlib.machinery.BYTECODE_SUFFIXES:
kind = PY_COMPILED
file = open(path, 'rb')
elif suffix in importlib.machinery.EXTENSION_SUFFIXES:
kind = C_EXTENSION

if kind in {PY_SOURCE, PY_COMPILED}:
file = open(path, mode)
else:
path = None
suffix = mode = ''
Expand Down
14 changes: 6 additions & 8 deletions setuptools/command/bdist_egg.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __bootstrap__():
__bootstrap__()
"""
).lstrip()
with open(pyfile, 'w') as f:
with open(pyfile, 'w', encoding="utf-8") as f:
f.write(_stub_template % resource)


Expand Down Expand Up @@ -200,10 +200,9 @@ def run(self): # noqa: C901 # is too complex (14) # FIXME
log.info("writing %s", native_libs)
if not self.dry_run:
ensure_directory(native_libs)
libs_file = open(native_libs, 'wt')
libs_file.write('\n'.join(all_outputs))
libs_file.write('\n')
libs_file.close()
with open(native_libs, 'wt', encoding="utf-8") as libs_file:
libs_file.write('\n'.join(all_outputs))
libs_file.write('\n')
elif os.path.isfile(native_libs):
log.info("removing %s", native_libs)
if not self.dry_run:
Expand Down Expand Up @@ -350,9 +349,8 @@ def write_safety_flag(egg_dir, safe):
if safe is None or bool(safe) != flag:
os.unlink(fn)
elif safe is not None and bool(safe) == flag:
f = open(fn, 'wt')
f.write('\n')
f.close()
with open(fn, 'wt', encoding="utf-8") as f:
f.write('\n')


safety_flags = {
Expand Down
8 changes: 3 additions & 5 deletions setuptools/command/build_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,8 @@ def _write_stub_file(self, stub_file: str, ext: Extension, compile=False):
if compile and os.path.exists(stub_file):
raise BaseError(stub_file + " already exists! Please delete.")
if not self.dry_run:
f = open(stub_file, 'w')
f.write(
'\n'.join([
with open(stub_file, 'w', encoding="utf-8") as f:
content = '\n'.join([
"def __bootstrap__():",
" global __bootstrap__, __file__, __loader__",
" import sys, os, pkg_resources, importlib.util" + if_dl(", dl"),
Expand All @@ -368,8 +367,7 @@ def _write_stub_file(self, stub_file: str, ext: Extension, compile=False):
"__bootstrap__()",
"", # terminal \n
])
)
f.close()
f.write(content)
if compile:
self._compile_and_remove_stub(stub_file)

Expand Down
16 changes: 10 additions & 6 deletions setuptools/command/develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from setuptools import namespaces
import setuptools

from ..unicode_utils import _read_utf8_with_fallback


class develop(namespaces.DevelopInstaller, easy_install):
"""Set up package for development"""
Expand Down Expand Up @@ -119,7 +121,7 @@ def install_for_development(self):
# create an .egg-link in the installation dir, pointing to our egg
log.info("Creating %s (link to %s)", self.egg_link, self.egg_base)
if not self.dry_run:
with open(self.egg_link, "w") as f:
with open(self.egg_link, "w", encoding="utf-8") as f:
f.write(self.egg_path + "\n" + self.setup_path)
# postprocess the installed distro, fixing up .pth, installing scripts,
# and handling requirements
Expand All @@ -128,9 +130,12 @@ def install_for_development(self):
def uninstall_link(self):
if os.path.exists(self.egg_link):
log.info("Removing %s (link to %s)", self.egg_link, self.egg_base)
egg_link_file = open(self.egg_link)
contents = [line.rstrip() for line in egg_link_file]
egg_link_file.close()

contents = [
line.rstrip()
for line in _read_utf8_with_fallback(self.egg_link).splitlines()
]

if contents not in ([self.egg_path], [self.egg_path, self.setup_path]):
log.warn("Link points to %s: uninstall aborted", contents)
return
Expand All @@ -156,8 +161,7 @@ def install_egg_scripts(self, dist):
for script_name in self.distribution.scripts or []:
script_path = os.path.abspath(convert_path(script_name))
script_name = os.path.basename(script_path)
with open(script_path) as strm:
script_text = strm.read()
script_text = _read_utf8_with_fallback(script_path)
self.install_script(dist, script_name, script_text, script_path)

return None
Expand Down
20 changes: 10 additions & 10 deletions setuptools/command/easy_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,9 @@ def write_script(self, script_name, contents, mode="t", blockers=()):
ensure_directory(target)
if os.path.exists(target):
os.unlink(target)
with open(target, "w" + mode) as f: # TODO: is it safe to use utf-8?

encoding = None if "b" in mode else "utf-8"
with open(target, "w" + mode, encoding=encoding) as f:
f.write(contents)
chmod(target, 0o777 - mask)

Expand Down Expand Up @@ -1017,12 +1019,11 @@ def install_exe(self, dist_filename, tmpdir):

# Write EGG-INFO/PKG-INFO
if not os.path.exists(pkg_inf):
f = open(pkg_inf, 'w') # TODO: probably it is safe to use utf-8
f.write('Metadata-Version: 1.0\n')
for k, v in cfg.items('metadata'):
if k != 'target_version':
f.write('%s: %s\n' % (k.replace('_', '-').title(), v))
f.close()
with open(pkg_inf, 'w', encoding="utf-8") as f:
f.write('Metadata-Version: 1.0\n')
for k, v in cfg.items('metadata'):
if k != 'target_version':
f.write('%s: %s\n' % (k.replace('_', '-').title(), v))
script_dir = os.path.join(_egg_info, 'scripts')
# delete entry-point scripts to avoid duping
self.delete_blockers([
Expand Down Expand Up @@ -1088,9 +1089,8 @@ def process(src, dst):
if locals()[name]:
txt = os.path.join(egg_tmp, 'EGG-INFO', name + '.txt')
if not os.path.exists(txt):
f = open(txt, 'w') # TODO: probably it is safe to use utf-8
f.write('\n'.join(locals()[name]) + '\n')
f.close()
with open(txt, 'w', encoding="utf-8") as f:
f.write('\n'.join(locals()[name]) + '\n')

def install_wheel(self, wheel_path, tmpdir):
wheel = Wheel(wheel_path)
Expand Down
6 changes: 3 additions & 3 deletions setuptools/command/install_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def write_script(self, script_name, contents, mode="t", *ignored):
target = os.path.join(self.install_dir, script_name)
self.outfiles.append(target)

encoding = None if "b" in mode else "utf-8"
mask = current_umask()
if not self.dry_run:
ensure_directory(target)
f = open(target, "w" + mode)
f.write(contents)
f.close()
with open(target, "w" + mode, encoding=encoding) as f:
f.write(contents)
chmod(target, 0o777 - mask)
8 changes: 5 additions & 3 deletions setuptools/command/setopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import os
import configparser

from setuptools import Command
from .. import Command
from ..unicode_utils import _cfg_read_utf8_with_fallback

__all__ = ['config_file', 'edit_config', 'option_base', 'setopt']

Expand Down Expand Up @@ -36,7 +37,8 @@ def edit_config(filename, settings, dry_run=False):
log.debug("Reading configuration from %s", filename)
opts = configparser.RawConfigParser()
opts.optionxform = lambda x: x
opts.read([filename])
_cfg_read_utf8_with_fallback(opts, filename)

for section, options in settings.items():
if options is None:
log.info("Deleting section [%s] from %s", section, filename)
Expand All @@ -62,7 +64,7 @@ def edit_config(filename, settings, dry_run=False):

log.info("Writing %s", filename)
if not dry_run:
with open(filename, 'w') as f:
with open(filename, 'w', encoding="utf-8") as f:
opts.write(f)


Expand Down
2 changes: 1 addition & 1 deletion setuptools/dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def get_egg_cache_dir(self):
os.mkdir(egg_cache_dir)
windows_support.hide_file(egg_cache_dir)
readme_txt_filename = os.path.join(egg_cache_dir, 'README.txt')
with open(readme_txt_filename, 'w') as f:
with open(readme_txt_filename, 'w', encoding="utf-8") as f:
f.write(
'This directory contains eggs that were downloaded '
'by setuptools to build, test, and run plug-ins.\n\n'
Expand Down
15 changes: 8 additions & 7 deletions setuptools/package_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
from setuptools.wheel import Wheel
from setuptools.extern.more_itertools import unique_everseen

from .unicode_utils import _read_utf8_with_fallback, _cfg_read_utf8_with_fallback


EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
Expand Down Expand Up @@ -419,9 +421,9 @@ def scan_egg_links(self, search_path):
list(itertools.starmap(self.scan_egg_link, egg_links))

def scan_egg_link(self, path, entry):
with open(os.path.join(path, entry)) as raw_lines:
# filter non-empty lines
lines = list(filter(None, map(str.strip, raw_lines)))
content = _read_utf8_with_fallback(os.path.join(path, entry))
# filter non-empty lines
lines = list(filter(None, map(str.strip, content.splitlines())))

if len(lines) != 2:
# format is not recognized; punt
Expand Down Expand Up @@ -714,7 +716,7 @@ def gen_setup(self, filename, fragment, tmpdir):
shutil.copy2(filename, dst)
filename = dst

with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
with open(os.path.join(tmpdir, 'setup.py'), 'w', encoding="utf-8") as file:
file.write(
"from setuptools import setup\n"
"setup(name=%r, version=%r, py_modules=[%r])\n"
Expand Down Expand Up @@ -1011,7 +1013,7 @@ def __init__(self):

rc = os.path.join(os.path.expanduser('~'), '.pypirc')
if os.path.exists(rc):
self.read(rc)
_cfg_read_utf8_with_fallback(self, rc)

@property
def creds_by_repository(self):
Expand Down Expand Up @@ -1114,8 +1116,7 @@ def local_open(url):
for f in os.listdir(filename):
filepath = os.path.join(filename, f)
if f == 'index.html':
with open(filepath, 'r') as fp:
body = fp.read()
body = _read_utf8_with_fallback(filepath)
break
elif os.path.isdir(filepath):
f += '/'
Expand Down
3 changes: 2 additions & 1 deletion setuptools/tests/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class VirtualEnv(jaraco.envs.VirtualEnv):

def run(self, cmd, *args, **kwargs):
cmd = [self.exe(cmd[0])] + cmd[1:]
kwargs = {"cwd": self.root, **kwargs} # Allow overriding
kwargs = {"cwd": self.root, "encoding": "utf-8", **kwargs} # Allow overriding
# In some environments (eg. downstream distro packaging), where:
# - tox isn't used to run tests and
# - PYTHONPATH is set to point to a specific setuptools codebase and
Expand Down Expand Up @@ -76,6 +76,7 @@ def run_setup_py(cmd, pypath=None, path=None, data_stream=0, env=None):
stderr=_PIPE,
shell=shell,
env=env,
encoding="utf-8",
)

if isinstance(data_stream, tuple):
Expand Down
4 changes: 2 additions & 2 deletions setuptools/tests/test_build_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,14 +941,14 @@ def test_legacy_editable_install(venv, tmpdir, tmpdir_cwd):

# First: sanity check
cmd = ["pip", "install", "--no-build-isolation", "-e", "."]
output = str(venv.run(cmd, cwd=tmpdir), "utf-8").lower()
output = venv.run(cmd, cwd=tmpdir).lower()
assert "running setup.py develop for myproj" not in output
assert "created wheel for myproj" in output

# Then: real test
env = {**os.environ, "SETUPTOOLS_ENABLE_FEATURES": "legacy-editable"}
cmd = ["pip", "install", "--no-build-isolation", "-e", "."]
output = str(venv.run(cmd, cwd=tmpdir, env=env), "utf-8").lower()
output = venv.run(cmd, cwd=tmpdir, env=env).lower()
assert "running setup.py develop for myproj" in output


Expand Down
2 changes: 1 addition & 1 deletion setuptools/tests/test_easy_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def test_many_pth_distributions_merge_together(self, tmpdir):

@pytest.fixture
def setup_context(tmpdir):
with (tmpdir / 'setup.py').open('w') as f:
with (tmpdir / 'setup.py').open('w', encoding="utf-8") as f:
f.write(SETUP_PY)
with tmpdir.as_cwd():
yield tmpdir
Expand Down
Loading

0 comments on commit 1ed7591

Please sign in to comment.