From bbea6a932ccde0b71fb2bdcd4d49413581b7add2 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Thu, 9 Jan 2025 13:24:18 +0100 Subject: [PATCH] CLI-298: auto detect dockerignore for `Image.from_dockerfile()` and `Image.dockerfile_commands()` methods (#2729) --- modal/_utils/docker_utils.py | 36 +++++++++- modal/image.py | 108 ++++++++++++++++++---------- modal_version/__init__.py | 2 +- modal_version/_version_generated.py | 2 +- test/docker_utils_test.py | 93 +++++++++++++++++++++++- test/image_test.py | 31 ++++++++ 6 files changed, 231 insertions(+), 41 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 44fc242ef..2741b3370 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -1,7 +1,8 @@ # Copyright Modal Labs 2024 import re import shlex -from typing import Sequence +from pathlib import Path +from typing import Optional, Sequence from ..exception import InvalidError @@ -62,3 +63,36 @@ def extract_copy_command_patterns(dockerfile_lines: Sequence[str]) -> list[str]: current_command = "" return list(copy_source_patterns) + + +def find_dockerignore_file(context_directory: Path, dockerfile_path: Optional[Path] = None) -> Optional[Path]: + """ + Find dockerignore file relative to current context directory + and if dockerfile path is provided, check if specific .dockerignore + file exists in the same directory as + Finds the most specific dockerignore file that exists. + """ + + def valid_dockerignore_file(fp): + # fp has to exist + if not fp.exists(): + return False + # fp has to be subpath to current working directory + if not fp.is_relative_to(context_directory): + return False + + return True + + generic_name = ".dockerignore" + possible_locations = [] + if dockerfile_path: + specific_name = f"{dockerfile_path.name}.dockerignore" + # 1. check if specific .dockerignore file exists in the same directory as + possible_locations.append(dockerfile_path.parent / specific_name) + # 2. check if generic .dockerignore file exists in the same directory as + possible_locations.append(dockerfile_path.parent / generic_name) + + # 3. check if generic .dockerignore file exists in current working directory + possible_locations.append(context_directory / generic_name) + + return next((e for e in possible_locations if valid_dockerignore_file(e)), None) diff --git a/modal/image.py b/modal/image.py index 9f3fc73f2..ba5d93e07 100644 --- a/modal/image.py +++ b/modal/image.py @@ -33,6 +33,7 @@ from ._utils.deprecation import deprecation_error, deprecation_warning from ._utils.docker_utils import ( extract_copy_command_patterns, + find_dockerignore_file, ) from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors @@ -71,6 +72,17 @@ CONTAINER_REQUIREMENTS_PATH = "/modal_requirements.txt" +class _AutoDockerIgnoreSentinel: + def __repr__(self) -> str: + return f"{__name__}.AUTO_DOCKERIGNORE" + + def __call__(self, _: Path) -> bool: + raise NotImplementedError("This is only a placeholder. Do not call") + + +AUTO_DOCKERIGNORE = _AutoDockerIgnoreSentinel() + + def _validate_python_version( python_version: Optional[str], builder_version: ImageBuilderVersion, allow_micro_granularity: bool = True ) -> str: @@ -266,6 +278,47 @@ def ignore_with_include(source: Path) -> bool: return _Mount._add_local_dir(Path("./"), PurePosixPath("/"), ignore=ignore_with_include) +def _create_context_mount_function( + ignore: Union[Sequence[str], Callable[[Path], bool]], + dockerfile_cmds: list[str] = [], + dockerfile_path: Optional[Path] = None, + context_mount: Optional[_Mount] = None, +): + if dockerfile_path and dockerfile_cmds: + raise InvalidError("Cannot provide both dockerfile and docker commands") + + if context_mount: + if ignore is not AUTO_DOCKERIGNORE: + raise InvalidError("Cannot set both `context_mount` and `ignore`") + + def identity_context_mount_fn() -> Optional[_Mount]: + return context_mount + + return identity_context_mount_fn + elif ignore is AUTO_DOCKERIGNORE: + + def auto_created_context_mount_fn() -> Optional[_Mount]: + context_dir = Path.cwd() + dockerignore_file = find_dockerignore_file(context_dir, dockerfile_path) + ignore_fn = ( + FilePatternMatcher(*dockerignore_file.read_text("utf8").splitlines()) + if dockerignore_file + else _ignore_fn(()) + ) + + cmds = dockerfile_path.read_text("utf8").splitlines() if dockerfile_path else dockerfile_cmds + return _create_context_mount(cmds, ignore_fn=ignore_fn, context_dir=context_dir) + + return auto_created_context_mount_fn + + def auto_created_context_mount_fn() -> Optional[_Mount]: + # use COPY commands and ignore patterns to construct implicit context mount + cmds = dockerfile_path.read_text("utf8").splitlines() if dockerfile_path else dockerfile_cmds + return _create_context_mount(cmds, ignore_fn=_ignore_fn(ignore), context_dir=Path.cwd()) + + return auto_created_context_mount_fn + + class _ImageRegistryConfig: """mdmd:hidden""" @@ -1221,7 +1274,7 @@ def dockerfile_commands( # modal.Mount with local files to supply as build context for COPY commands context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' - ignore: Union[Sequence[str], Callable[[Path], bool]] = (), + ignore: Union[Sequence[str], Callable[[Path], bool]] = AUTO_DOCKERIGNORE, ) -> "_Image": """ Extend an image with arbitrary Dockerfile-like commands. @@ -1232,6 +1285,11 @@ def dockerfile_commands( from pathlib import Path from modal import FilePatternMatcher + # By default a .dockerignore file is used if present in the current working directory + image = modal.Image.debian_slim().dockerfile_commands( + ["COPY data /data"], + ) + image = modal.Image.debian_slim().dockerfile_commands( ["COPY data /data"], ignore=["*.venv"], @@ -1264,22 +1322,6 @@ def dockerfile_commands( if not cmds: return self - if context_mount: - if ignore: - raise InvalidError("Cannot set both `context_mount` and `ignore`") - - def identity_context_mount_fn() -> Optional[_Mount]: - return context_mount - - context_mount_function = identity_context_mount_fn - else: - - def auto_created_context_mount_fn() -> Optional[_Mount]: - # use COPY commands and ignore patterns to construct implicit context mount - return _create_context_mount(cmds, ignore_fn=_ignore_fn(ignore), context_dir=Path.cwd()) - - context_mount_function = auto_created_context_mount_fn - def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return DockerfileSpec(commands=["FROM base", *cmds], context_files=context_files) @@ -1288,7 +1330,9 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: dockerfile_function=build_dockerfile, secrets=secrets, gpu_config=parse_gpu_config(gpu), - context_mount_function=context_mount_function, + context_mount_function=_create_context_mount_function( + ignore=ignore, dockerfile_cmds=cmds, context_mount=context_mount + ), force_build=self.force_build or force_build, ) @@ -1653,7 +1697,7 @@ def from_dockerfile( secrets: Sequence[_Secret] = [], gpu: GPU_T = None, add_python: Optional[str] = None, - ignore: Union[Sequence[str], Callable[[Path], bool]] = (), + ignore: Union[Sequence[str], Callable[[Path], bool]] = AUTO_DOCKERIGNORE, ) -> "_Image": """Build a Modal image from a local Dockerfile. @@ -1666,6 +1710,12 @@ def from_dockerfile( from pathlib import Path from modal import FilePatternMatcher + # By default a .dockerignore file is used if present in the current working directory + image = modal.Image.from_dockerfile( + "./Dockerfile", + add_python="3.12", + ) + image = modal.Image.from_dockerfile( "./Dockerfile", add_python="3.12", @@ -1700,22 +1750,6 @@ def from_dockerfile( ``` """ - if context_mount: - if ignore: - raise InvalidError("Cannot set both `context_mount` and `ignore`") - - def identity_context_mount_fn() -> Optional[_Mount]: - return context_mount - - context_mount_function = identity_context_mount_fn - else: - - def auto_created_context_mount_fn() -> Optional[_Mount]: - lines = Path(path).read_text("utf8").splitlines() - return _create_context_mount(lines, ignore_fn=_ignore_fn(ignore), context_dir=Path.cwd()) - - context_mount_function = auto_created_context_mount_fn - # --- Build the base dockerfile def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: @@ -1726,7 +1760,9 @@ def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: gpu_config = parse_gpu_config(gpu) base_image = _Image._from_args( dockerfile_function=build_dockerfile_base, - context_mount_function=context_mount_function, + context_mount_function=_create_context_mount_function( + ignore=ignore, dockerfile_path=Path(path), context_mount=context_mount + ), gpu_config=gpu_config, secrets=secrets, ) diff --git a/modal_version/__init__.py b/modal_version/__init__.py index a94d38ea3..896011e5c 100644 --- a/modal_version/__init__.py +++ b/modal_version/__init__.py @@ -7,7 +7,7 @@ major_number = 0 # Bump this manually on breaking changes, then reset the number in _version_generated.py -minor_number = 71 +minor_number = 72 # Right now, automatically increment the patch number in CI __version__ = f"{major_number}.{minor_number}.{max(build_number, 0)}" diff --git a/modal_version/_version_generated.py b/modal_version/_version_generated.py index 1dc1e4f69..870b0b006 100644 --- a/modal_version/_version_generated.py +++ b/modal_version/_version_generated.py @@ -1,4 +1,4 @@ # Copyright Modal Labs 2025 # Note: Reset this value to -1 whenever you make a minor `0.X` release of the client. -build_number = 13 # git: 1c4cfc0 +build_number = -1 # git: 1c4cfc0 diff --git a/test/docker_utils_test.py b/test/docker_utils_test.py index a7c97517d..3ec2d16e8 100644 --- a/test/docker_utils_test.py +++ b/test/docker_utils_test.py @@ -1,8 +1,9 @@ # Copyright Modal Labs 2024 import pytest +from pathlib import Path from tempfile import NamedTemporaryFile, TemporaryDirectory -from modal._utils.docker_utils import extract_copy_command_patterns +from modal._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file @pytest.mark.parametrize( @@ -48,7 +49,7 @@ def test_extract_copy_command_patterns(copy_commands, expected_patterns): assert copy_command_sources == expected_patterns -@pytest.mark.usefixture("tmp_cwd") +@pytest.mark.usefixtures("tmp_cwd") def test_image_dockerfile_copy_messy(): with TemporaryDirectory(dir="./") as tmp_dir: dockerfile = NamedTemporaryFile("w", delete=False) @@ -96,3 +97,91 @@ def test_image_dockerfile_copy_messy(): f"{tmp_dir}/file2.txt", ] ) + + +@pytest.mark.usefixtures("tmp_cwd") +def test_find_generic_cwd_dockerignore_file(): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == dockerignore_path + + +@pytest.mark.usefixtures("tmp_cwd") +def test_dont_find_specific_dockerignore_file(): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + dockerfile_path = dir1 / "foo" + dockerignore_path = tmp_path / "foo.dockerignore" + dockerignore_path.write_text("**/*") + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) is None + + +@pytest.mark.usefixtures("tmp_cwd") +def test_prefer_specific_cwd_dockerignore_file(): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + dockerfile_path = tmp_path / "Dockerfile" + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*.py") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == specific_dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) != generic_dockerignore_path + + +@pytest.mark.usefixtures("tmp_cwd") +def test_dont_find_nested_dockerignore_file(): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + dir2 = dir1 / "dir2" + dir2.mkdir() + + dockerfile_path = dir1 / "Dockerfile" + dockerfile_path.write_text("COPY . /dummy") + + # should ignore parent ones + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + + # should ignore nested ones + nested_generic_dockerignore_path = dir2 / ".dockerignore" + nested_generic_dockerignore_path.write_text("**/*") + nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" + nested_specific_dockerignore_path.write_text("**/*") + + assert find_dockerignore_file(dir1, dockerfile_path) is None + + +@pytest.mark.usefixtures("tmp_cwd") +def test_find_next_to_dockerfile_dockerignore_file(): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == dockerignore_path diff --git a/test/image_test.py b/test/image_test.py index a407e125c..3c84702d8 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -829,6 +829,37 @@ def test_image_docker_command_copy( Path(maybe_dockerfile.name).unlink() +@pytest.mark.parametrize("use_dockerfile", (True, False)) +@pytest.mark.usefixtures("tmp_cwd") +def test_image_dockerfile_copy_auto_dockerignore(builder_version, servicer, client, use_dockerfile): + rel_top_dir = Path("top") + rel_top_dir.mkdir() + (rel_top_dir / "data.txt").write_text("world") + (rel_top_dir / "file.py").write_text("world") + dockerfile = rel_top_dir / "Dockerfile" + dockerfile.write_text(f"COPY {rel_top_dir} /dummy\n") + + # automatically find the .dockerignore file + dockerignore = Path(".") / ".dockerignore" + dockerignore.write_text("**/*.txt") + + app = App() + if use_dockerfile: + image = Image.debian_slim().from_dockerfile(dockerfile) + layer = 1 + else: + image = Image.debian_slim().dockerfile_commands([f"COPY {rel_top_dir} /dummy"]) + layer = 0 + app.function(image=image)(dummy) + + with app.run(client=client): + layers = get_image_layers(image.object_id, servicer) + assert f"COPY {rel_top_dir} /dummy" in layers[layer].dockerfile_commands + mount_id = layers[layer].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) + assert files == {Path("/") / rel_top_dir / "Dockerfile", Path("/") / rel_top_dir / "file.py"} + + @pytest.mark.parametrize("use_dockerfile", (True, False)) @pytest.mark.usefixtures("tmp_cwd") def test_image_dockerfile_copy_ignore_from_file(builder_version, servicer, client, use_dockerfile):