diff --git a/synthtool/gcp/common.py b/synthtool/gcp/common.py index b996fcc52..6298a36a9 100644 --- a/synthtool/gcp/common.py +++ b/synthtool/gcp/common.py @@ -205,6 +205,12 @@ def py_samples_override( overridden_samples_kwargs["subdir"] = override_path return self._generic_library("python_samples", **overridden_samples_kwargs) + def python_notebooks(self, **kwargs) -> Path: + # kwargs["metadata"] is required to load values from .repo-metadata.json + if "metadata" not in kwargs: + kwargs["metadata"] = {} + return self._generic_library("python_notebooks", **kwargs) + def py_library(self, **kwargs) -> Path: # kwargs["metadata"] is required to load values from .repo-metadata.json if "metadata" not in kwargs: diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/CheckPythonVersion.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/CheckPythonVersion.py new file mode 100644 index 000000000..06fc6e4ae --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/CheckPythonVersion.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys + +MINIMUM_MAJOR_VERSION = 3 +MINIMUM_MINOR_VERSION = 5 + +if ( + sys.version_info.major >= MINIMUM_MAJOR_VERSION + or sys.version_info.minor >= MINIMUM_MINOR_VERSION +): + print(f"Python version acceptable: {sys.version}") + exit(0) +else: + print( + f"Error: Python version less than {MINIMUM_MAJOR_VERSION}.{MINIMUM_MINOR_VERSION}" + ) + exit(1) diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteChangedNotebooks.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteChangedNotebooks.py new file mode 100644 index 000000000..53974f26a --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteChangedNotebooks.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import dataclasses +import datetime +import functools +import pathlib +import os +import subprocess +from pathlib import Path +from typing import List, Optional +import concurrent +from tabulate import tabulate + +import ExecuteNotebook + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + +def format_timedelta(delta: datetime.timedelta) -> str: + """Formats a timedelta duration to [N days] %H:%M:%S format""" + seconds = int(delta.total_seconds()) + + secs_in_a_day = 86400 + secs_in_a_hour = 3600 + secs_in_a_min = 60 + + days, seconds = divmod(seconds, secs_in_a_day) + hours, seconds = divmod(seconds, secs_in_a_hour) + minutes, seconds = divmod(seconds, secs_in_a_min) + + time_fmt = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + + if days > 0: + suffix = "s" if days > 1 else "" + return f"{days} day{suffix} {time_fmt}" + + return time_fmt + + +@dataclasses.dataclass +class NotebookExecutionResult: + notebook: str + duration: datetime.timedelta + is_pass: bool + error_message: Optional[str] + + +def execute_notebook( + artifacts_path: str, + variable_project_id: str, + variable_region: str, + should_log_output: bool, + should_use_new_kernel: bool, + notebook: str, +) -> NotebookExecutionResult: + print(f"Running notebook: {notebook}") + + result = NotebookExecutionResult( + notebook=notebook, + duration=datetime.timedelta(seconds=0), + is_pass=False, + error_message=None, + ) + + time_start = datetime.datetime.now() + try: + ExecuteNotebook.execute_notebook( + notebook_file_path=notebook, + output_file_folder=artifacts_path, + replacement_map={ + "PROJECT_ID": variable_project_id, + "REGION": variable_region, + }, + should_log_output=should_log_output, + should_use_new_kernel=should_use_new_kernel, + ) + result.duration = datetime.datetime.now() - time_start + result.is_pass = True + print(f"{notebook} PASSED in {format_timedelta(result.duration)}.") + except Exception as error: + result.duration = datetime.datetime.now() - time_start + result.is_pass = False + result.error_message = str(error) + print( + f"{notebook} FAILED in {format_timedelta(result.duration)}: {result.error_message}" + ) + + return result + + +def run_changed_notebooks( + test_paths_file: str, + base_branch: Optional[str], + output_folder: str, + variable_project_id: str, + variable_region: str, + should_parallelize: bool, + should_use_separate_kernels: bool, +): + """ + Run the notebooks that exist under the folders defined in the test_paths_file. + It only runs notebooks that have differences from the Git base_branch. + The executed notebooks are saved in the output_folder. + Variables are also injected into the notebooks such as the variable_project_id and variable_region. + Args: + test_paths_file (str): + Required. The new-line delimited file to folders and files that need checking. + Folders are checked recursively. + base_branch (str): + Optional. If provided, only the files that have changed from the base_branch will be checked. + If not provided, all files will be checked. + output_folder (str): + Required. The folder to write executed notebooks to. + variable_project_id (str): + Required. The value for PROJECT_ID to inject into notebooks. + variable_region (str): + Required. The value for REGION to inject into notebooks. + should_parallelize (bool): + Required. Should run notebooks in parallel using a thread pool as opposed to in sequence. + should_use_separate_kernels (bool): + Note: Dependencies don't install correctly when this is set to True + See https://github.com/nteract/papermill/issues/625 + Required. Should run each notebook in a separate and independent virtual environment. + """ + + test_paths = [] + with open(test_paths_file) as file: + lines = [line.strip() for line in file.readlines()] + lines = [line for line in lines if len(line) > 0] + test_paths = [line for line in lines] + + if len(test_paths) == 0: + raise RuntimeError("No test folders found.") + + print(f"Checking folders: {test_paths}") + + # Find notebooks + notebooks = [] + if base_branch: + print(f"Looking for notebooks that changed from branch: {base_branch}") + notebooks = subprocess.check_output( + ["git", "diff", "--name-only", f"origin/{base_branch}..."] + test_paths + ) + else: + print("Looking for all notebooks.") + notebooks = subprocess.check_output(["git", "ls-files"] + test_paths) + + notebooks = notebooks.decode("utf-8").split("\n") + notebooks = [notebook for notebook in notebooks if notebook.endswith(".ipynb")] + notebooks = [notebook for notebook in notebooks if len(notebook) > 0] + notebooks = [notebook for notebook in notebooks if Path(notebook).exists()] + + # Create paths + artifacts_path = Path(output_folder) + artifacts_path.mkdir(parents=True, exist_ok=True) + artifacts_path.joinpath("success").mkdir(parents=True, exist_ok=True) + artifacts_path.joinpath("failure").mkdir(parents=True, exist_ok=True) + + notebook_execution_results: List[NotebookExecutionResult] = [] + + if len(notebooks) > 0: + print(f"Found {len(notebooks)} modified notebooks: {notebooks}") + + if should_parallelize and len(notebooks) > 1: + print( + "Running notebooks in parallel, so no logs will be displayed. Please wait..." + ) + with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor: + notebook_execution_results = list( + executor.map( + functools.partial( + execute_notebook, + artifacts_path, + variable_project_id, + variable_region, + False, + should_use_separate_kernels, + ), + notebooks, + ) + ) + else: + notebook_execution_results = [ + execute_notebook( + artifacts_path=artifacts_path, + variable_project_id=variable_project_id, + variable_region=variable_region, + notebook=notebook, + should_log_output=True, + should_use_new_kernel=should_use_separate_kernels, + ) + for notebook in notebooks + ] + else: + print("No notebooks modified in this pull request.") + + print("\n=== RESULTS ===\n") + + notebooks_sorted = sorted( + notebook_execution_results, key=lambda result: result.is_pass, reverse=True, + ) + # Print results + print( + tabulate( + [ + [ + os.path.basename(os.path.normpath(result.notebook)), + "PASSED" if result.is_pass else "FAILED", + format_timedelta(result.duration), + result.error_message or "--", + ] + for result in notebooks_sorted + ], + headers=["file", "status", "duration", "error"], + ) + ) + + print("\n=== END RESULTS===\n") + + +parser = argparse.ArgumentParser(description="Run changed notebooks.") +parser.add_argument( + "--test_paths_file", + type=pathlib.Path, + help="The path to the file that has newline-limited folders of notebooks that should be tested.", + required=True, +) +parser.add_argument( + "--base_branch", + help="The base git branch to diff against to find changed files.", + required=False, +) +parser.add_argument( + "--output_folder", + type=pathlib.Path, + help="The path to the folder to store executed notebooks.", + required=True, +) +parser.add_argument( + "--variable_project_id", + type=str, + help="The GCP project id. This is used to inject a variable value into the notebook before running.", + required=True, +) +parser.add_argument( + "--variable_region", + type=str, + help="The GCP region. This is used to inject a variable value into the notebook before running.", + required=True, +) + +# Note: Dependencies don't install correctly when this is set to True +parser.add_argument( + "--should_parallelize", + type=str2bool, + nargs="?", + const=True, + default=False, + help="Should run notebooks in parallel.", +) + +# Note: This isn't guaranteed to work correctly due to existing Papermill issue +# See https://github.com/nteract/papermill/issues/625 +parser.add_argument( + "--should_use_separate_kernels", + type=str2bool, + nargs="?", + const=True, + default=False, + help="(Experimental) Should run each notebook in a separate and independent virtual environment.", +) + +args = parser.parse_args() +run_changed_notebooks( + test_paths_file=args.test_paths_file, + base_branch=args.base_branch, + output_folder=args.output_folder, + variable_project_id=args.variable_project_id, + variable_region=args.variable_region, + should_parallelize=args.should_parallelize, + should_use_separate_kernels=args.should_use_separate_kernels, +) diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteNotebook.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteNotebook.py new file mode 100644 index 000000000..f9fdbb857 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/ExecuteNotebook.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import sys +import nbformat +import os +import errno +from NotebookProcessors import RemoveNoExecuteCells, UpdateVariablesPreprocessor +from typing import Dict, Tuple +import papermill as pm +import shutil +import virtualenv +import uuid +from jupyter_client.kernelspecapp import KernelSpecManager + +# This script is used to execute a notebook and write out the output notebook. +# The replaces calling the nbconvert via command-line, which doesn't write the output notebook correctly when there are errors during execution. + +STAGING_FOLDER = "staging" +ENVIRONMENTS_PATH = "environments" +KERNELS_SPECS_PATH = "kernel_specs" + + +def create_and_install_kernel() -> Tuple[str, str]: + # Create environment + kernel_name = str(uuid.uuid4()) + env_name = f"{ENVIRONMENTS_PATH}/{kernel_name}" + # venv.create(env_name, system_site_packages=True, with_pip=True) + virtualenv.cli_run([env_name, "--system-site-packages"]) + + # Create kernel spec + kernel_spec = { + "argv": [ + f"{env_name}/bin/python", + "-m", + "ipykernel_launcher", + "-f", + "{connection_file}", + ], + "display_name": "Python 3", + "language": "python", + } + kernel_spec_folder = os.path.join(KERNELS_SPECS_PATH, kernel_name) + kernel_spec_file = os.path.join(kernel_spec_folder, "kernel.json") + + # Create kernel spec folder + if not os.path.exists(os.path.dirname(kernel_spec_file)): + try: + os.makedirs(os.path.dirname(kernel_spec_file)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + + with open(kernel_spec_file, mode="w", encoding="utf-8") as f: + json.dump(kernel_spec, f) + + # Install kernel + kernel_spec_manager = KernelSpecManager() + kernel_spec_manager.install_kernel_spec( + source_dir=kernel_spec_folder, kernel_name=kernel_name + ) + + return kernel_name, env_name + + +def execute_notebook( + notebook_file_path: str, + output_file_folder: str, + replacement_map: Dict[str, str], + should_log_output: bool, + should_use_new_kernel: bool, +): + # Create staging directory if it doesn't exist + staging_file_path = f"{STAGING_FOLDER}/{notebook_file_path}" + if not os.path.exists(os.path.dirname(staging_file_path)): + try: + os.makedirs(os.path.dirname(staging_file_path)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + + file_name = os.path.basename(os.path.normpath(notebook_file_path)) + + # Create environments folder + if not os.path.exists(ENVIRONMENTS_PATH): + try: + os.makedirs(ENVIRONMENTS_PATH) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + + # Create and install kernel + kernel_name = next( + iter(KernelSpecManager().find_kernel_specs().keys()), None + ) # Find first existing kernel and use as default + env_name = None + if should_use_new_kernel: + kernel_name, env_name = create_and_install_kernel() + + # Read notebook + with open(notebook_file_path) as f: + nb = nbformat.read(f, as_version=4) + + has_error = False + + # Execute notebook + try: + # Create preprocessors + remove_no_execute_cells_preprocessor = RemoveNoExecuteCells() + update_variables_preprocessor = UpdateVariablesPreprocessor( + replacement_map=replacement_map + ) + + # Use no-execute preprocessor + (nb, resources,) = remove_no_execute_cells_preprocessor.preprocess(nb) + + (nb, resources) = update_variables_preprocessor.preprocess(nb, resources) + + # print(f"Staging modified notebook to: {staging_file_path}") + with open(staging_file_path, mode="w", encoding="utf-8") as f: + nbformat.write(nb, f) + + # Execute notebook + pm.execute_notebook( + input_path=staging_file_path, + output_path=staging_file_path, + kernel_name=kernel_name, + progress_bar=should_log_output, + request_save_on_cell_execute=should_log_output, + log_output=should_log_output, + stdout_file=sys.stdout if should_log_output else None, + stderr_file=sys.stderr if should_log_output else None, + ) + except Exception: + # print(f"Error executing the notebook: {notebook_file_path}.\n\n") + has_error = True + + raise + + finally: + # Clear env + if env_name is not None: + shutil.rmtree(path=env_name) + + # Copy execute notebook + output_file_path = os.path.join( + output_file_folder, "failure" if has_error else "success", file_name + ) + + # Create directories if they don't exist + if not os.path.exists(os.path.dirname(output_file_path)): + try: + os.makedirs(os.path.dirname(output_file_path)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + + # print(f"Writing output to: {output_file_path}") + shutil.move(staging_file_path, output_file_path) diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/NotebookProcessors.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/NotebookProcessors.py new file mode 100644 index 000000000..90a61a51c --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/NotebookProcessors.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nbconvert.preprocessors import Preprocessor +from typing import Dict +import UpdateNotebookVariables + + +class RemoveNoExecuteCells(Preprocessor): + def preprocess(self, notebook, resources=None): + executable_cells = [] + for cell in notebook.cells: + if cell.metadata.get("tags"): + if "no_execute" in cell.metadata.get("tags"): + continue + executable_cells.append(cell) + notebook.cells = executable_cells + return notebook, resources + + +class UpdateVariablesPreprocessor(Preprocessor): + def __init__(self, replacement_map: Dict): + self._replacement_map = replacement_map + + @staticmethod + def update_variables(content: str, replacement_map: Dict[str, str]): + # replace variables inside .ipynb files + # looking for this format inside notebooks: + # VARIABLE_NAME = '[description]' + + for variable_name, variable_value in replacement_map.items(): + content = UpdateNotebookVariables.get_updated_value( + content=content, + variable_name=variable_name, + variable_value=variable_value, + ) + + return content + + def preprocess(self, notebook, resources=None): + executable_cells = [] + for cell in notebook.cells: + if cell.cell_type == "code": + cell.source = self.update_variables( + content=cell.source, replacement_map=self._replacement_map, + ) + + executable_cells.append(cell) + notebook.cells = executable_cells + return notebook, resources diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/UpdateNotebookVariables.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/UpdateNotebookVariables.py new file mode 100644 index 000000000..b357d7854 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/UpdateNotebookVariables.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re + +""" + This script is used to update variables in the notebook via regex + It requires variables to be defined in particular format +For example, if your variable was PROJECT_ID, use: + PROJECT_ID = "[your_project_here]" +Single-quotes also work: + PROJECT_ID = '[your_project_here]' +Variables in conditionals can also be replaced: + PROJECT_ID == "[your_project_here]" +""" + + +def get_updated_value(content: str, variable_name: str, variable_value: str) -> str: + return re.sub( + rf"({variable_name}.*?=.*?[\",\'])\[.+?\]([\",\'].*?)", + rf"\1{variable_value}\2", + content, + flags=re.M, + ) + + +def test_update_value(): + new_content = get_updated_value( + content='asdf\nPROJECT_ID = "[your-project-id]" #@param {type:"string"} \nasdf', + variable_name="PROJECT_ID", + variable_value="sample-project", + ) + assert ( + new_content + == 'asdf\nPROJECT_ID = "sample-project" #@param {type:"string"} \nasdf' + ) + + +def test_update_value_single_quotes(): + new_content = get_updated_value( + content="PROJECT_ID = '[your-project-id]'", + variable_name="PROJECT_ID", + variable_value="sample-project", + ) + assert new_content == "PROJECT_ID = 'sample-project'" + + +def test_update_value_avoidance(): + new_content = get_updated_value( + content="PROJECT_ID = shell_output[0] ", + variable_name="PROJECT_ID", + variable_value="sample-project", + ) + assert new_content == "PROJECT_ID = shell_output[0] " + + +def test_region(): + new_content = get_updated_value( + content='REGION = "[your-region]" # @param {type:"string"}', + variable_name="REGION", + variable_value="us-central1", + ) + assert new_content == 'REGION = "us-central1" # @param {type:"string"}' diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup-cloudbuild.yaml b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup-cloudbuild.yaml new file mode 100644 index 000000000..890f5c4e9 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup-cloudbuild.yaml @@ -0,0 +1,8 @@ +steps: + # Install Python dependencies and run cleanup script + - name: ${_PYTHON_IMAGE} + entrypoint: /bin/sh + args: + - -c + - 'python3 -m pip install -U -r .cloud-build/cleanup/cleanup-requirements.txt && python3 .cloud-build/cleanup/cleanup.py' +timeout: 86400s \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup.py new file mode 100644 index 000000000..35ec9dd20 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/cleanup.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List +from resource_cleanup_manager import ( + ResourceCleanupManager, + DatasetResourceCleanupManager, + EndpointResourceCleanupManager, + ModelResourceCleanupManager, +) + + +def run_cleanup_managers(managers: List[ResourceCleanupManager], is_dry_run: bool): + for manager in managers: + type_name = manager.type_name + + print(f"Fetching {type_name}'s...") + resources = manager.list() + print(f"Found {len(resources)} {type_name}'s") + for resource in resources: + if not manager.is_deletable(resource): + continue + + if is_dry_run: + resource_name = manager.resource_name(resource) + print(f"Will delete '{type_name}': {resource_name}") + else: + manager.delete(resource) + + +def set_dry_run(dry_run_status: bool): + if dry_run_status is True: + return True + print("Starting cleanup in dry run mode...") + return False + + +# List of all cleanup managers +managers = [ + DatasetResourceCleanupManager(), + EndpointResourceCleanupManager(), + ModelResourceCleanupManager(), +] + +run_cleanup_managers(managers=managers, is_dry_run=set_dry_run(False)) diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/requirements.txt b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/requirements.txt new file mode 100644 index 000000000..87be9a131 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/requirements.txt @@ -0,0 +1 @@ +google-cloud-aiplatform==1.4.3 \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/resource_cleanup_manager.py b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/resource_cleanup_manager.py new file mode 100644 index 000000000..11f45dcc4 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/cleanup/resource_cleanup_manager.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# # Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import abc +from google.cloud import aiplatform +from typing import Any +from proto.datetime_helpers import DatetimeWithNanoseconds +from google.cloud.aiplatform import base + +# If a resource was updated within this number of seconds, do not delete. +RESOURCE_UPDATE_BUFFER_IN_SECONDS = 60 * 60 * 8 + + +class ResourceCleanupManager(abc.ABC): + @property + @abc.abstractmethod + def type_name(str) -> str: + pass + + @abc.abstractmethod + def list(self) -> Any: + pass + + @abc.abstractmethod + def resource_name(self, resource: Any) -> str: + pass + + @abc.abstractmethod + def delete(self, resource: Any): + pass + + @abc.abstractmethod + def get_seconds_since_modification(self, resource: Any) -> float: + pass + + def is_deletable(self, resource: Any) -> bool: + time_difference = self.get_seconds_since_modification(resource) + + if self.resource_name(resource).startswith("perm"): + print(f"Skipping '{resource}' due to name starting with 'perm'.") + return False + + # Check that it wasn't created too recently, to prevent race conditions + if time_difference <= RESOURCE_UPDATE_BUFFER_IN_SECONDS: + print( + f"Skipping '{resource}' due update_time being '{time_difference}', which is less than '{RESOURCE_UPDATE_BUFFER_IN_SECONDS}'." + ) + return False + + return True + + +class VertexAIResourceCleanupManager(ResourceCleanupManager): + @property + @abc.abstractmethod + def vertex_ai_resource(self) -> base.VertexAiResourceNounWithFutureManager: + pass + + @property + def type_name(self) -> str: + return self.vertex_ai_resource._resource_noun + + def list(self) -> Any: + return self.vertex_ai_resource.list() + + def resource_name(self, resource: Any) -> str: + return resource.display_name + + def delete(self, resource): + resource.delete() + + def get_seconds_since_modification(self, resource: Any) -> bool: + update_time = resource.update_time + current_time = DatetimeWithNanoseconds.now(tz=update_time.tzinfo) + return (current_time - update_time).total_seconds() + + +class DatasetResourceCleanupManager(VertexAIResourceCleanupManager): + vertex_ai_resource = aiplatform.datasets._Dataset + + +class EndpointResourceCleanupManager(VertexAIResourceCleanupManager): + vertex_ai_resource = aiplatform.Endpoint + + def delete(self, resource): + resource.delete(force=True) + + +class ModelResourceCleanupManager(VertexAIResourceCleanupManager): + vertex_ai_resource = aiplatform.Model diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/notebook-execution-test-cloudbuild.yaml b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/notebook-execution-test-cloudbuild.yaml new file mode 100644 index 000000000..15d1d8283 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/notebook-execution-test-cloudbuild.yaml @@ -0,0 +1,44 @@ +steps: + # Show the gcloud info and check if gcloud exists + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: /bin/sh + args: + - -c + - 'gcloud config list' + # Check the Python version + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: /bin/sh + args: + - -c + - 'python3 .cloud-build/CheckPythonVersion.py' + # Fetch base branch if required + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: /bin/sh + args: + - -c + - 'if [ -n "${_BASE_BRANCH}" ]; then git fetch origin "${_BASE_BRANCH}":refs/remotes/origin/"${_BASE_BRANCH}"; else echo "Skipping fetch."; fi' + # Install Python dependencies + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: pip + args: ['install', '--upgrade', '--user', '--requirement', '.cloud-build/requirements.txt'] + # Install Python dependencies and run testing script + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: /bin/sh + args: + - -c + - 'python3 -m pip freeze && python3 .cloud-build/ExecuteChangedNotebooks.py --test_paths_file "${_TEST_PATHS_FILE}" --base_branch "${_FORCED_BASE_BRANCH}" --output_folder ${BUILD_ID} --variable_project_id ${PROJECT_ID} --variable_region ${_GCP_REGION}' + env: + - 'IS_TESTING=1' + # Manually copy artifacts to GCS + - name: gcr.io/cloud-builders/gsutil + entrypoint: /bin/sh + args: + - -c + - 'if [ $(ls -pR "/workspace/${BUILD_ID}" | grep -v / | grep -v ^$ | wc -l) -ne 0 ]; then gsutil -m -q rsync -r "/workspace/${BUILD_ID}" "gs://${_GCS_ARTIFACTS_BUCKET}/test-artifacts/PR_${_PR_NUMBER}/BUILD_${BUILD_ID}/"; else echo "No artifacts to copy."; fi' + # Fail if there is anything in the failure folder + - name: 'gcr.io/cloud-devrel-public-resources/python-samples-testing-docker:latest' + entrypoint: /bin/sh + args: + - -c + - 'echo "Download executed notebooks with this command: \"mkdir -p artifacts && gsutil rsync -r gs://${_GCS_ARTIFACTS_BUCKET}/test-artifacts/PR_${_PR_NUMBER}/BUILD_${BUILD_ID} artifacts/\"" && if [ "$(ls -A /workspace/${BUILD_ID}/failure | wc -l)" -ne 0 ]; then exit 1; else exit 0; fi' +timeout: 86400s \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/requirements.txt b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/requirements.txt new file mode 100644 index 000000000..81a3079b8 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.cloud-build/requirements.txt @@ -0,0 +1,8 @@ +ipython==7.0 +jupyter==1.0 +nbconvert==6.0 +papermill==2.3 +numpy==1.19 +pandas==1.2 +matplotlib==3.4 +tabulate==0.8.9 \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/snippet-bot.yml b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/snippet-bot.yml new file mode 100644 index 000000000..e69de29bb diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/ci.yaml b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/ci.yaml new file mode 100644 index 000000000..d8b78f268 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/ci.yaml @@ -0,0 +1,34 @@ +name: ci +on: pull_request + +jobs: + format_and_lint: + name: notebook format and lint + runs-on: ubuntu-latest + steps: + - name: Set up Python + uses: actions/setup-python@v2 + - name: Fetch pull request branch + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Fetch base main branch + run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" main:main + - name: Install requirements + run: python3 -m pip install -U -r .github/workflows/linter/requirements.txt + - name: Format and lint notebooks + run: | + set +e + .github/workflows/linter/run_linter.sh -t + RTN=$? + if [ "$RTN" != "0" ]; then + echo "There were problems formatting/linting the notebooks." + echo "Please run the following commands locally from the root directory to attempt to autofix the issues:" + echo "" + echo "python3 -m pip install -U -r .github/workflows/linter/requirements.txt" + echo ".github/workflows/linter/run_linter.sh" + echo "" + echo "If it can't be autofixed, please fix them manually." + echo "Then, commit the fixes and push again." + exit 1 + fi \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/requirements.txt b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/requirements.txt new file mode 100644 index 000000000..2f011649c --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/requirements.txt @@ -0,0 +1,9 @@ +git+https://github.com/tensorflow/docs +ipython +jupyter +nbconvert +black==21.8b0 +pyupgrade==2.7.3 +isort==5.6.4 +flake8==3.9.0 +nbqa==1.1.0 \ No newline at end of file diff --git a/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/run_linter.sh b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/run_linter.sh new file mode 100644 index 000000000..7b7cadb22 --- /dev/null +++ b/synthtool/gcp/templates/python_notebooks_testing_pipeline/.github/workflows/linter/run_linter.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script automatically formats and lints all notebooks that have changed from the head of the master branch. +# +# Options: +# -t: Test-mode. Only test if format and linting are required but make no changes to files. +# +# Returns: +# This script will return 0 if linting was successful/unneeded and 1 if there were any errors. + +# `+e` enables the script to continue even when a command fails +set +e + +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -o pipefail + +# Use RTN to return a non-zero value if the test fails. +RTN="0" + +is_test=false + +# Process all options supplied on the command line +while getopts 'tc' arg; do + case $arg in + 't') + is_test=true + ;; + *) + echo "Unimplemented flag" + exit 1 + ;; + esac +done + +echo "Test mode: $is_test" + +# Only check notebooks in test folders modified in this pull request. +# Note: Use process substitution to persist the data in the array +notebooks=() +while read -r file || [ -n "$line" ]; +do + notebooks+=("$file") +done < <(git diff --name-only main... | grep '\.ipynb$') + +problematic_notebooks=() +if [ ${#notebooks[@]} -gt 0 ]; then + for notebook in "${notebooks[@]}" + do + if [ -f "$notebook" ]; then + echo "Checking notebook: ${notebook}" + + NBFMT_RTN="0" + BLACK_RTN="0" + PYUPGRADE_RTN="0" + ISORT_RTN="0" + FLAKE8_RTN="0" + + if [ "$is_test" = true ] ; then + echo "Running nbfmt..." + python3 -m tensorflow_docs.tools.nbfmt --remove_outputs --test "$notebook" + NBFMT_RTN=$? + # echo "Running black..." + # python3 -m nbqa black "$notebook" --check + # BLACK_RTN=$? + echo "Running pyupgrade..." + python3 -m nbqa pyupgrade "$notebook" + PYUPGRADE_RTN=$? + echo "Running isort..." + python3 -m nbqa isort "$notebook" --check + ISORT_RTN=$? + echo "Running flake8..." + python3 -m nbqa flake8 "$notebook" --show-source --extend-ignore=W391,E501,F821,E402,F404,W503,W291,E203,E999,E111,E113 + FLAKE8_RTN=$? + else + echo "Running black..." + python3 -m black "$notebook" + BLACK_RTN=$? + echo "Running pyupgrade..." + python3 -m nbqa pyupgrade "$notebook" --nbqa-mutate + PYUPGRADE_RTN=$? + echo "Running isort..." + python3 -m nbqa isort "$notebook" --nbqa-mutate + ISORT_RTN=$? + echo "Running nbfmt..." + python3 -m tensorflow_docs.tools.nbfmt --remove_outputs "$notebook" + NBFMT_RTN=$? + echo "Running flake8..." + python3 -m nbqa flake8 "$notebook" --show-source --extend-ignore=W391,E501,F821,E402,F404,W503,W291,E203,E999,E111,E113 + FLAKE8_RTN=$? + fi + + NOTEBOOK_RTN="0" + + if [ "$NBFMT_RTN" != "0" ]; then + NOTEBOOK_RTN="$NBFMT_RTN" + printf "nbfmt: Failed\n" + fi + + if [ "$BLACK_RTN" != "0" ]; then + NOTEBOOK_RTN="$BLACK_RTN" + printf "black: Failed\n" + fi + + if [ "$PYUPGRADE_RTN" != "0" ]; then + NOTEBOOK_RTN="$PYUPGRADE_RTN" + printf "pyupgrade: Failed\n" + fi + + if [ "$ISORT_RTN" != "0" ]; then + NOTEBOOK_RTN="$ISORT_RTN" + printf "isort: Failed\n" + fi + + if [ "$FLAKE8_RTN" != "0" ]; then + NOTEBOOK_RTN="$FLAKE8_RTN" + printf "flake8: Failed\n" + fi + + echo "Notebook lint finished with return code = $NOTEBOOK_RTN" + echo "" + if [ "$NOTEBOOK_RTN" != "0" ] + then + problematic_notebooks+=("$notebook") + RTN=$NOTEBOOK_RTN + fi + fi + done +else + echo "No notebooks modified in this pull request." +fi + +echo "All tests finished. Exiting with return code = $RTN" + +if [ ${#problematic_notebooks[@]} -gt 0 ]; then + echo "The following notebooks could not be automatically linted:" + printf '%s\n' "${problematic_notebooks[@]}" +fi + +exit "$RTN" \ No newline at end of file