Skip to content

Commit

Permalink
py_unzip
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobbogdanov committed Jul 31, 2023
1 parent 0c3893d commit 8aa1bbd
Show file tree
Hide file tree
Showing 5 changed files with 472 additions and 44 deletions.
132 changes: 88 additions & 44 deletions python/pkg/rules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -4,64 +4,66 @@ load("@rules_128tech//exec_wrapper:rules.bzl", "exec_wrapper")
load("@rules_128tech//python:env.bzl", "get_python_env")
load("@subpar//:subpar.bzl", "par_binary")
load("@rules_pkg//:pkg.bzl", "pkg_tar")

def pkg_python_app(
name,
tar,
entrypoint = None,
bindir = None,
libdir = None,
env = None,
use_exec_wrapper = True,
zip_safe = False,
mode = "0755",
tar_visibility = None,
**kwargs):
load(
"@rules_128tech//python/py_unzip:rules.bzl",
"py_unzip",
"py_unzip_exec_path",
"py_unzip_package_dir",
)

def pkg_python_app(name, tar, bindir, libdir, use_py_unzip = False, **kwargs):
"""
Create and package a entire python application into a single tar.
Args:
name: (str) The name of the python binary application.
tar: (str) The name of the tar containing all built artifacts.
bindir: (str) The directory where the entry point will be placed.
libdir: (str) The directory where the binary will be placed.
use_py_unzip: Switch between par_binary and py_unzip as the packaging strategy.
**kwargs: pass anything to py_binary.
entrypoint: (str) The name of the entry point to execute the python application.
Defaults to the `name` of the application
bindir: (str) The directory where the binary entry point will be placed.
libdir: (str) The directory where the par_binary will be placed. Note: Two
sub-folders will be used.
- <libdir>/par/: The par_binary archive will be placed here.
- <libdir>/unpar/: The par_binary archive will unpack here on first invocating.
Outputs:
%{name}: py_binary containing the python application.
env: (str_dict) Specify custom environment variables that should be set.
[default get_python_env()]
%{name}.par: binary wrapping the py_binary into a stand-alone app. Iff
use_py_unzip is False.
use_exec_wrapper: (bool) Whether or not to create an exec_wrapper to run the app
[default: True]
%{name}_exec_wrapper: The entry point script. Iff use_exec_wrapper is True.
zip_safe: (bool) Whether the binary is zip safe. See par_binary for more info.
mode: (str) The mode of the files in the tar.
tar_visibility: (str_list) The visibility of the tar archive.
**kwargs: pass anything to par_binary
Creates:
<name>: py_binary containing the python application.
<name>.par: par_binary wrapping the py_binary into a stand-alone app.
<name>_exec_wrapper: The entry point script.
<tar>: pkg_tar containing the par_binary and entry point with the correct
%{tar}: pkg_tar containing the binary and entry point with the correct
path structure.
"""
entrypoint = entrypoint or name
if use_py_unzip:
_pkg_py_unzip_app(
name,
tar = tar,
bindir = bindir,
libdir = libdir,
**kwargs
)
else:
_pkg_par_binary_app(
name = name,
tar = tar,
bindir = bindir,
libdir = libdir,
**kwargs
)

def _pkg_par_binary_app(
name,
tar,
entrypoint = None,
bindir = None,
libdir = None,
env = None,
use_exec_wrapper = True,
zip_safe = False,
mode = "0755",
tar_visibility = None,
**kwargs):
if use_exec_wrapper:
exec_wrapper(
name = "%s_exec_wrapper" % name,
Expand All @@ -85,6 +87,7 @@ def pkg_python_app(
remap_paths = {"/%s.par" % name: "%s/par/%s.par" % (libdir, name)}

if use_exec_wrapper:
entrypoint = entrypoint or name
srcs.append(":%s_exec_wrapper" % name)
remap_paths["/%s_exec_wrapper" % name] = "%s/%s" % (bindir, entrypoint)

Expand All @@ -96,3 +99,44 @@ def pkg_python_app(
remap_paths = remap_paths,
visibility = tar_visibility,
)

def _pkg_py_unzip_app(
name,
tar,
bindir,
libdir,
entrypoint = None,
mode = "0755",
env = None,
visibility = None,
**kwargs):
if kwargs.pop("tar_visibility", None):
fail("use 'visibility' not 'tar_visibility'")
if kwargs.pop("zip_safe", None):
fail("'zip_safe' must be false when using py_unzip")

exec_wrapper_name = "%s_exec_wrapper" % name
exec_wrapper(
name = exec_wrapper_name,
env = env or get_python_env(),
exe = py_unzip_exec_path(libdir, name),
)

py_unzip(
name = name,
package_dir = py_unzip_package_dir(libdir, name),
visibility = visibility,
**kwargs
)

entrypoint = entrypoint or name

pkg_tar(
name = tar,
srcs = [exec_wrapper_name],
deps = [name + ".tar"],
mode = mode,
strip_prefix = ".",
remap_paths = {"/%s_exec_wrapper" % name: "%s/%s" % (bindir, entrypoint)},
visibility = visibility,
)
9 changes: 9 additions & 0 deletions python/py_unzip/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
load("@rules_python//python:defs.bzl", "py_binary")

exports_files(["__main__.py.tmpl"])

py_binary(
name = "rezipper",
srcs = ["rezipper.py"],
visibility = ["//visibility:public"],
)
128 changes: 128 additions & 0 deletions python/py_unzip/__main__.py.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
%shebang%

# Copyright 2019 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Main module for python applications generated using py_unzip.

This is based on the bazel-generated __main__.py.
https://bazel.googlesource.com/bazel/+/refs/heads/release-7.0.0-pre.20230128.3rc1/tools/python/python_bootstrap_template.txt
"""

import sys

# The Python interpreter unconditionally prepends the directory containing this
# script (following symlinks) to the import path. This is the cause of
# bazelbuild/bazel/#9239, and is a special case of bazelbuild/bazel/#7091. We therefore
# explicitly delete that entry. TODO(bazelbuild/bazel/#7091): Remove this hack when no
# longer necessary.
del sys.path[0]

import os
import pathlib
import shutil


def FindPythonBinary() -> str:
"""Finds the real Python binary if it's not a normal absolute path."""
python_binary = "%python_binary%"
if os.path.isabs(python_binary):
return python_binary

prog = shutil.which(python_binary)
if prog is None:
raise AssertionError(f"Could not find python binary: {python_binary}")
return prog


def CreatePythonPathEntries(
python_imports: str,
module_space: pathlib.Path,
) -> "list[str]":
return [str(module_space)] + [
f"{module_space}/{path}" for path in python_imports.split(":")
]


def GetRepositoriesImports(module_space: pathlib.Path) -> "list[str]":
return [str(d) for d in sorted(module_space.iterdir()) if d.is_dir()]


def Deduplicate(items):
"""Efficiently filter out duplicates, keeping the first element only."""
seen = set()
for it in items:
if it not in seen:
seen.add(it)
yield it


def Main():
args = sys.argv[1:]
module_space = FindModuleSpace()

python_imports = '%imports%'
python_path_entries = CreatePythonPathEntries(python_imports, module_space)
python_path_entries += GetRepositoriesImports(module_space)
# Remove duplicates to avoid overly long PYTHONPATH (bazelbuild/bazel#10977).
# Preserve order, keep first occurrence only.
python_path = ":".join([d.strip() for d in Deduplicate(python_path_entries)])

try:
old_python_path = os.environ["PYTHONPATH"]
except KeyError:
pass
else:
python_path = f"{python_path}:{old_python_path}"

os.environ["PYTHONPATH"] = python_path
# Now look for my main python source file.
# The magic string percent-main-percent is replaced with the filename of the
# main file of the Python binary in BazelPythonSemantics.java.
rel_path = "%main%".strip()

main_filename = module_space / rel_path
assert main_filename.exists(), f"Cannot exec() {main_filename!r}: file not found."
assert os.access(
main_filename, os.R_OK
), f"Cannot exec() {main_filename!r}: file not readable."

python_program = FindPythonBinary()

args = [python_program, str(main_filename)] + args

try:
sys.stdout.flush()
os.execv(args[0], args)
except OSError as err:
# This exception occurs when os.execv() fails for some reason.
if not getattr(err, "filename", None):
err.filename = python_program # Add info to error message
raise


def FindModuleSpace() -> pathlib.Path:
stub_filename = pathlib.Path(sys.argv[0])
if not stub_filename.is_absolute():
stub_filename = os.getcwd() / stub_filename

# If a directory contains a __main__.py then 'python dir' is equivalent
# to 'python dir/__main__.py'.
dir_name = stub_filename if stub_filename.is_dir() else stub_filename.parent
return dir_name / "runfiles"


if __name__ == "__main__":
Main()
71 changes: 71 additions & 0 deletions python/py_unzip/rezipper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Repack a bazel python_zip_file (the same thing generated by specifying 'bazel build
--build_python_zip') into a tarfile that extracts into a runnable application.
"""

import argparse
import io
import pathlib
import tarfile
import zipfile


def main():
args = _parse_args()
run(src=args.src, dst=args.dst, package_dir=args.package_dir, main=args.main)


def _parse_args():
parser = argparse.ArgumentParser(description=__doc__)

parser.add_argument(
"--src",
help="Path to the source .zip file.",
required=True,
)
parser.add_argument(
"--dst",
help="Path to the output .tar file.",
required=True,
)
parser.add_argument(
"--package-dir",
help="The directory in which to expand the specified files.",
type=pathlib.Path,
required=True,
)
parser.add_argument(
"--main",
help="The __main__.py for the output tar.",
type=pathlib.Path,
required=True,
)

return parser.parse_args()


def run(src: str, dst: str, package_dir: pathlib.Path, main: pathlib.Path):
with zipfile.ZipFile(src, "r") as z_in:
with tarfile.TarFile(dst, "w") as z_out:
addfile = _make_addfile(z_out, package_dir)

for info in z_in.infolist():
# Override the __main__.py from the python_zip_file.
if info.filename == "__main__.py":
addfile(info.filename, main.read_bytes(), mode=0o755)
else:
addfile(info.filename, z_in.read(info.filename), mode=0o644)


def _make_addfile(z_out: tarfile.TarFile, package_dir: pathlib.Path):
def addfile(filename: str, data: bytes, mode: int) -> None:
tarinfo = tarfile.TarInfo(str(package_dir / filename))
tarinfo.size = len(data)
tarinfo.mode = mode
z_out.addfile(tarinfo, io.BytesIO(data))

return addfile


if __name__ == "__main__":
main()
Loading

0 comments on commit 8aa1bbd

Please sign in to comment.