diff --git a/python/pkg/rules.bzl b/python/pkg/rules.bzl index d981dfa..bcacb0d 100644 --- a/python/pkg/rules.bzl +++ b/python/pkg/rules.bzl @@ -4,64 +4,66 @@ load("@rules_128tech//exec_wrapper:rules.bzl", "exec_wrapper") load("@rules_128tech//python:env.bzl", "get_python_env") load("@subpar//:subpar.bzl", "par_binary") load("@rules_pkg//:pkg.bzl", "pkg_tar") - -def pkg_python_app( - name, - tar, - entrypoint = None, - bindir = None, - libdir = None, - env = None, - use_exec_wrapper = True, - zip_safe = False, - mode = "0755", - tar_visibility = None, - **kwargs): +load( + "@rules_128tech//python/py_unzip:rules.bzl", + "py_unzip", + "py_unzip_exec_path", + "py_unzip_package_dir", +) + +def pkg_python_app(name, tar, bindir, libdir, use_py_unzip = False, **kwargs): """ Create and package a entire python application into a single tar. Args: name: (str) The name of the python binary application. - tar: (str) The name of the tar containing all built artifacts. + bindir: (str) The directory where the entry point will be placed. + libdir: (str) The directory where the binary will be placed. + use_py_unzip: Switch between par_binary and py_unzip as the packaging strategy. + **kwargs: pass anything to py_binary. - entrypoint: (str) The name of the entry point to execute the python application. - Defaults to the `name` of the application - - bindir: (str) The directory where the binary entry point will be placed. - - libdir: (str) The directory where the par_binary will be placed. Note: Two - sub-folders will be used. - - /par/: The par_binary archive will be placed here. - - /unpar/: The par_binary archive will unpack here on first invocating. + Outputs: + %{name}: py_binary containing the python application. - env: (str_dict) Specify custom environment variables that should be set. - [default get_python_env()] + %{name}.par: binary wrapping the py_binary into a stand-alone app. Iff + use_py_unzip is False. - use_exec_wrapper: (bool) Whether or not to create an exec_wrapper to run the app - [default: True] + %{name}_exec_wrapper: The entry point script. Iff use_exec_wrapper is True. - zip_safe: (bool) Whether the binary is zip safe. See par_binary for more info. - - mode: (str) The mode of the files in the tar. - - tar_visibility: (str_list) The visibility of the tar archive. - - **kwargs: pass anything to par_binary - - Creates: - : py_binary containing the python application. - - .par: par_binary wrapping the py_binary into a stand-alone app. - - _exec_wrapper: The entry point script. - - : pkg_tar containing the par_binary and entry point with the correct + %{tar}: pkg_tar containing the binary and entry point with the correct path structure. """ - entrypoint = entrypoint or name + if use_py_unzip: + _pkg_py_unzip_app( + name, + tar = tar, + bindir = bindir, + libdir = libdir, + **kwargs + ) + else: + _pkg_par_binary_app( + name = name, + tar = tar, + bindir = bindir, + libdir = libdir, + **kwargs + ) +def _pkg_par_binary_app( + name, + tar, + entrypoint = None, + bindir = None, + libdir = None, + env = None, + use_exec_wrapper = True, + zip_safe = False, + mode = "0755", + tar_visibility = None, + **kwargs): if use_exec_wrapper: exec_wrapper( name = "%s_exec_wrapper" % name, @@ -85,6 +87,7 @@ def pkg_python_app( remap_paths = {"/%s.par" % name: "%s/par/%s.par" % (libdir, name)} if use_exec_wrapper: + entrypoint = entrypoint or name srcs.append(":%s_exec_wrapper" % name) remap_paths["/%s_exec_wrapper" % name] = "%s/%s" % (bindir, entrypoint) @@ -96,3 +99,44 @@ def pkg_python_app( remap_paths = remap_paths, visibility = tar_visibility, ) + +def _pkg_py_unzip_app( + name, + tar, + bindir, + libdir, + entrypoint = None, + mode = "0755", + env = None, + visibility = None, + **kwargs): + if kwargs.pop("tar_visibility", None): + fail("use 'visibility' not 'tar_visibility'") + if kwargs.pop("zip_safe", None): + fail("'zip_safe' must be false when using py_unzip") + + exec_wrapper_name = "%s_exec_wrapper" % name + exec_wrapper( + name = exec_wrapper_name, + env = env or get_python_env(), + exe = py_unzip_exec_path(libdir, name), + ) + + py_unzip( + name = name, + package_dir = py_unzip_package_dir(libdir, name), + visibility = visibility, + **kwargs + ) + + entrypoint = entrypoint or name + + pkg_tar( + name = tar, + srcs = [exec_wrapper_name], + deps = [name + ".tar"], + mode = mode, + strip_prefix = ".", + remap_paths = {"/%s_exec_wrapper" % name: "%s/%s" % (bindir, entrypoint)}, + visibility = visibility, + ) diff --git a/python/py_unzip/BUILD.bazel b/python/py_unzip/BUILD.bazel new file mode 100644 index 0000000..dc0de1a --- /dev/null +++ b/python/py_unzip/BUILD.bazel @@ -0,0 +1,9 @@ +load("@rules_python//python:defs.bzl", "py_binary") + +exports_files(["__main__.py.tmpl"]) + +py_binary( + name = "rezipper", + srcs = ["rezipper.py"], + visibility = ["//visibility:public"], +) diff --git a/python/py_unzip/__main__.py.tmpl b/python/py_unzip/__main__.py.tmpl new file mode 100644 index 0000000..0ca37fd --- /dev/null +++ b/python/py_unzip/__main__.py.tmpl @@ -0,0 +1,128 @@ +%shebang% + +# Copyright 2019 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Main module for python applications generated using py_unzip. + +This is based on the bazel-generated __main__.py. +https://bazel.googlesource.com/bazel/+/refs/heads/release-7.0.0-pre.20230128.3rc1/tools/python/python_bootstrap_template.txt +""" + +import sys + +# The Python interpreter unconditionally prepends the directory containing this +# script (following symlinks) to the import path. This is the cause of +# bazelbuild/bazel/#9239, and is a special case of bazelbuild/bazel/#7091. We therefore +# explicitly delete that entry. TODO(bazelbuild/bazel/#7091): Remove this hack when no +# longer necessary. +del sys.path[0] + +import os +import pathlib +import shutil + + +def FindPythonBinary() -> str: + """Finds the real Python binary if it's not a normal absolute path.""" + python_binary = "%python_binary%" + if os.path.isabs(python_binary): + return python_binary + + prog = shutil.which(python_binary) + if prog is None: + raise AssertionError(f"Could not find python binary: {python_binary}") + return prog + + +def CreatePythonPathEntries( + python_imports: str, + module_space: pathlib.Path, +) -> "list[str]": + return [str(module_space)] + [ + f"{module_space}/{path}" for path in python_imports.split(":") + ] + + +def GetRepositoriesImports(module_space: pathlib.Path) -> "list[str]": + return [str(d) for d in sorted(module_space.iterdir()) if d.is_dir()] + + +def Deduplicate(items): + """Efficiently filter out duplicates, keeping the first element only.""" + seen = set() + for it in items: + if it not in seen: + seen.add(it) + yield it + + +def Main(): + args = sys.argv[1:] + module_space = FindModuleSpace() + + python_imports = '%imports%' + python_path_entries = CreatePythonPathEntries(python_imports, module_space) + python_path_entries += GetRepositoriesImports(module_space) + # Remove duplicates to avoid overly long PYTHONPATH (bazelbuild/bazel#10977). + # Preserve order, keep first occurrence only. + python_path = ":".join([d.strip() for d in Deduplicate(python_path_entries)]) + + try: + old_python_path = os.environ["PYTHONPATH"] + except KeyError: + pass + else: + python_path = f"{python_path}:{old_python_path}" + + os.environ["PYTHONPATH"] = python_path + # Now look for my main python source file. + # The magic string percent-main-percent is replaced with the filename of the + # main file of the Python binary in BazelPythonSemantics.java. + rel_path = "%main%".strip() + + main_filename = module_space / rel_path + assert main_filename.exists(), f"Cannot exec() {main_filename!r}: file not found." + assert os.access( + main_filename, os.R_OK + ), f"Cannot exec() {main_filename!r}: file not readable." + + python_program = FindPythonBinary() + + args = [python_program, str(main_filename)] + args + + try: + sys.stdout.flush() + os.execv(args[0], args) + except OSError as err: + # This exception occurs when os.execv() fails for some reason. + if not getattr(err, "filename", None): + err.filename = python_program # Add info to error message + raise + + +def FindModuleSpace() -> pathlib.Path: + stub_filename = pathlib.Path(sys.argv[0]) + if not stub_filename.is_absolute(): + stub_filename = os.getcwd() / stub_filename + + # If a directory contains a __main__.py then 'python dir' is equivalent + # to 'python dir/__main__.py'. + dir_name = stub_filename if stub_filename.is_dir() else stub_filename.parent + return dir_name / "runfiles" + + +if __name__ == "__main__": + Main() diff --git a/python/py_unzip/rezipper.py b/python/py_unzip/rezipper.py new file mode 100644 index 0000000..48b14c7 --- /dev/null +++ b/python/py_unzip/rezipper.py @@ -0,0 +1,71 @@ +""" +Repack a bazel python_zip_file (the same thing generated by specifying 'bazel build +--build_python_zip') into a tarfile that extracts into a runnable application. +""" + +import argparse +import io +import pathlib +import tarfile +import zipfile + + +def main(): + args = _parse_args() + run(src=args.src, dst=args.dst, package_dir=args.package_dir, main=args.main) + + +def _parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + + parser.add_argument( + "--src", + help="Path to the source .zip file.", + required=True, + ) + parser.add_argument( + "--dst", + help="Path to the output .tar file.", + required=True, + ) + parser.add_argument( + "--package-dir", + help="The directory in which to expand the specified files.", + type=pathlib.Path, + required=True, + ) + parser.add_argument( + "--main", + help="The __main__.py for the output tar.", + type=pathlib.Path, + required=True, + ) + + return parser.parse_args() + + +def run(src: str, dst: str, package_dir: pathlib.Path, main: pathlib.Path): + with zipfile.ZipFile(src, "r") as z_in: + with tarfile.TarFile(dst, "w") as z_out: + addfile = _make_addfile(z_out, package_dir) + + for info in z_in.infolist(): + # Override the __main__.py from the python_zip_file. + if info.filename == "__main__.py": + addfile(info.filename, main.read_bytes(), mode=0o755) + else: + addfile(info.filename, z_in.read(info.filename), mode=0o644) + + +def _make_addfile(z_out: tarfile.TarFile, package_dir: pathlib.Path): + def addfile(filename: str, data: bytes, mode: int) -> None: + tarinfo = tarfile.TarInfo(str(package_dir / filename)) + tarinfo.size = len(data) + tarinfo.mode = mode + z_out.addfile(tarinfo, io.BytesIO(data)) + + return addfile + + +if __name__ == "__main__": + main() diff --git a/python/py_unzip/rules.bzl b/python/py_unzip/rules.bzl new file mode 100644 index 0000000..85567ca --- /dev/null +++ b/python/py_unzip/rules.bzl @@ -0,0 +1,176 @@ +"""Create an archive that can be expanded into a standalone, runnable python application.""" + +load("@rules_python//python:defs.bzl", "PyInfo", "py_binary") + +def py_unzip_package_dir(libdir, app_name): + return "%s/unzip/%s" % (libdir.rstrip("/"), app_name.strip("/")) + +def py_unzip_exec_path(libdir, app_name): + return "%s/__main__.py" % py_unzip_package_dir(libdir, app_name) + +def py_unzip( + name, + main = None, + srcs = [], + tags = [], + visibility = None, + testonly = False, + package_dir = None, + **kwargs): + """ + Create an archive that can be expanded into a standalone, runnable python application. + + Args: + name: (str) The name of the py_binary. + main: (str) The main entry point for the py_binary. + srcs: (str_list) A list of python source files. + tags: (str_list) A list of tags to apply to the target. + visibility: (str_list) The visibility of the target. + testonly: (bool) If True, only testonly targets (such as tests) can depend on this target. + package_dir: The directory in which to expand the specified files, defaulting to '/'. + **kwargs: Pass additional keyword arguments to the underlying py_binary. + + Outputs: + "%{name}": A py_binary that can be `bazel run` if needed. + "%{name}.tar: A tarfile that can be extracted into a runnable python application. + + """ + py_binary( + # TODO: add 'main = _determine_main(main, srcs)' otherise the main argument is + # mandatory due to the underscore in the name. + name = "_" + name, + tags = tags, + main = main, + srcs = srcs, + visibility = visibility, + testonly = testonly, + **kwargs + ) + + native.alias( + name = name, + actual = "_" + name, + testonly = testonly, + tags = tags, + visibility = visibility, + ) + + _py_unzip( + name = name + ".tar", + src = "_" + name, + main = main, + srcs = srcs, + tags = tags + ["py_unzip"], + visibility = visibility, + testonly = testonly, + package_dir = package_dir, + ) + +def _py_unzip_impl(ctx): + main_file = _generate_main(ctx) + zip_file = _get_zip_file(ctx) + + ctx.actions.run( + outputs = [ctx.outputs.executable], + mnemonic = "ReZipper", + inputs = [zip_file, main_file], + executable = ctx.executable._rezipper, + arguments = [ + "--src", + zip_file.path, + "--dst", + ctx.outputs.executable.path, + "--package-dir", + _get_package_dir(ctx), + "--main", + main_file.path, + ], + progress_message = "Repacking %s into %s" % (zip_file.short_path, ctx.outputs.executable.short_path), + ) + + return [ + # TODO: This isn't actually execuable. This is needed so that the outputs can + # be '%{name}' and '%{name}.tar'. + DefaultInfo(executable = ctx.outputs.executable), + ] + +def _generate_main(ctx): + py_runtime = ctx.toolchains["@bazel_tools//tools/python:toolchain_type"].py3_runtime + + main_file = ctx.actions.declare_file(ctx.label.name + ".__main__.py") + + ctx.actions.expand_template( + template = ctx.file._main_template, + output = main_file, + substitutions = { + "%imports%": ":".join(ctx.attr.src[PyInfo].imports.to_list()), + "%main%": ctx.workspace_name + "/" + _determine_main(ctx).path, + "%python_binary%": py_runtime.interpreter_path, + "%shebang%": py_runtime.stub_shebang, + }, + is_executable = True, + ) + + return main_file + +def removesuffix(string, suffix): + """https://www.python.org/dev/peps/pep-0616/""" + + # suffix='' should not call string[:-0]. + if suffix and string.endswith(suffix): + return string[:-len(suffix)] + return string[:] + +def _determine_main(ctx): + """https://github.com/bazelbuild/bazel/blob/1eda22fa4d8488e434a7bbe1c548b5ca7fb7b6e5/src/main/starlark/builtins_bzl/common/python/py_executable.bzl#L608""" + + # This doesn't need robust error-handling because the py_binary is instantiated + # first and will fail first. + if ctx.attr.main: + proposed_main = ctx.attr.main.label.name + else: + proposed_main = removesuffix(ctx.label.name, ".tar") + ".py" + + main_files = [src for src in ctx.files.srcs if _path_endswith(src.short_path, proposed_main)] + if len(main_files) != 1: + fail("failed to determine main", attr = main_files) + return main_files[0] + +def _path_endswith(path, endswith): + # Use slash to anchor each path to prevent e.g. + # "ab/c.py".endswith("b/c.py") from incorrectly matching. + return ("/" + path).endswith("/" + endswith) + +def _get_zip_file(ctx): + zip_file = ctx.attr.src[OutputGroupInfo].python_zip_file + inputs = zip_file.to_list() + if len(inputs) != 1: + fail("expected only one .zip file", attr = inputs) + return inputs[0] + +def _get_package_dir(ctx): + package_dir = ctx.attr.package_dir or "" + if package_dir.startswith("/"): + package_dir = package_dir[1:] + return package_dir + +_py_unzip = rule( + implementation = _py_unzip_impl, + attrs = { + "main": attr.label(allow_files = True), + "package_dir": attr.string(), + "src": attr.label(mandatory = True), + "srcs": attr.label_list(allow_files = True), + "_main_template": attr.label( + default = "//python/py_unzip:__main__.py.tmpl", + allow_single_file = True, + ), + "_rezipper": attr.label( + default = "//python/py_unzip:rezipper", + executable = True, + cfg = "exec", + ), + }, + toolchains = ["@bazel_tools//tools/python:toolchain_type"], + executable = True, +)