diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1a59817..753f905 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,7 +17,7 @@ variables: PYTHON_IMAGE: python:3.9 - RUN_DIR: ./cicd/runtime + RUN_DIR: ./.gitlab/runtime USER: $GITLAB_USER_LOGIN # Configure mysql environment variables (https://hub.docker.com/_/mysql/) MYSQL_ROOT_PASSWORD: root @@ -25,12 +25,6 @@ variables: workflow: auto_cancel: on_new_commit: interruptible - rules: - - changes: - - .gitlab-ci.yml - - pyproject.toml - - "*.py" - - src/python/**/*.py default: image: $PYTHON_IMAGE @@ -58,6 +52,12 @@ python:prepare_venv: - source $RUN_DIR/venv/bin/activate script: - pip install -e .[cicd] + rules: + - changes: + - .gitlab-ci.yml + - pyproject.toml + - src/ensembl/**/*.py + - tests/**/*.py # Test stage @@ -65,6 +65,7 @@ python:prepare_venv: stage: test before_script: - source $RUN_DIR/venv/bin/activate + needs: ['python:prepare_venv'] ## Linting, type checking and formatting @@ -130,6 +131,23 @@ python:pytest: paths: - $RUN_DIR/coverage +## License-related checks: NOTICE file and license headers + +.license: + stage: test + variables: + CMD: python .gitlab/scripts/check_license.py + +license:notice: + extends: .license + script: + - $CMD notice + +license:header: + extends: .license + script: + - $CMD header + # Deploy stage ## Generate GitLab pages with HTML coverage report diff --git a/.gitlab/scripts/check_license.py b/.gitlab/scripts/check_license.py new file mode 100755 index 0000000..f936e45 --- /dev/null +++ b/.gitlab/scripts/check_license.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Checks if one of the requirements of the license is met by the repository.""" + +import argparse +import datetime +from itertools import zip_longest +from os import PathLike +from pathlib import Path +import re +import sys + + +# Do not show traceback for an easier error readability +sys.tracebacklimit = 0 + + +_SCRIPTS_DIR = Path(__file__).absolute().parent +_TEMPLATES_DIR = _SCRIPTS_DIR.parent / "templates" +_ROOT_PATH = _SCRIPTS_DIR.parents[1] +# Set of file suffixes for which we expect to find the license header +_SUFFIXES_WITH_HEADER = { + "pl", + "pm", + "py", + "nf", + "config", + "mysql", + "pgsql", + "sql", + "sqlite", + "bash", + "sh", + "toml", + "yml", +} +_EXCLUDE_DIRS = { + Path(".gitlab/runtime"), + Path("src/python/tests/data"), + Path("src/python/tests/utils/test_rloader"), +} + + +def check_notice(notice_template: PathLike) -> None: + """Checks if the NOTICE file is correct and the copyright year is up-to-date. + + Args: + notice_template: Path to notice template file. + + Raises: + RuntimeError: If the NOTICE file has incorrect format or the copyright year is not correct. + + """ + notice_file = _ROOT_PATH / "NOTICE" + year = datetime.date.today().year + report = "" + with notice_template.open() as tpl, notice_file.open() as fh: + # Add dummy values if one of the files turns out to be shorter than the other + for tpl_line, fh_line in zip_longest(tpl, fh, fillvalue=""): + tpl_line = tpl_line.replace("", f"{year}") + tpl_line = tpl_line.rstrip("\n") + fh_line = fh_line.rstrip("\n") + if tpl_line != fh_line: + report += f"> Expected: '{tpl_line}'\n Found: '{fh_line}'\n" + if report: + raise RuntimeError(f"Incorrect NOTICE file format or copyright year\n\n{report}") + else: + print("NOTICE file is correct and the copyright year is up-to-date") + + +def check_header(header_template: PathLike) -> None: + """Checks if every code file in the repository has a valid license header. + + Args: + header_template: Path to license header template file. + + Raises: + RuntimeError: If at least one file has missing or incorrect license header. + + """ + template = header_template.read_text() + # Escape symbols that may be interpreted as part of a regex otherwise + template = template.replace(".", "\.").replace("(", "\(").replace(")", "\)") + # Allow comment symbols and additional spaces before each header line, and single newline instead of two + template = "[#/-]*\s*" + template.replace("\n\n", "(\n)+").replace("\n", "\n[#/-]*\s*") + # Compile template as regex to improve performance + prog = re.compile(rf"{template}") + report_files = [] + for file_path in _ROOT_PATH.rglob("*.*"): + if file_path.is_file() and (file_path.suffix[1:] in _SUFFIXES_WITH_HEADER): + if _EXCLUDE_DIRS.intersection(set(file_path.relative_to(_ROOT_PATH).parents)): + # Do not check any files that belong to one of the directories to exclude + continue + if not prog.search(file_path.read_text()): + report_files.append(str(file_path)) + if report_files: + report = "\n".join(report_files) + raise RuntimeError( + f"{len(report_files)} code files have missing or incorrect license header\n\n{report}" + ) + else: + print("Every code file in the repository has a valid license header") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=( + "Checks if the NOTICE file is correct or if every code file in the repository has a valid " + "license header." + ) + ) + # Create separate subparsers for each case "notice" and "header" + subparsers = parser.add_subparsers(title="License aspect to check", required=True, dest="{notice,header}") + parser_notice = subparsers.add_parser("notice", help="Check NOTICE file format and copyright year") + parser_notice.add_argument( + "--template", + type=Path, + required=False, + default=_TEMPLATES_DIR / "notice.tpl", + help="Path to notice template file", + ) + parser_notice.set_defaults(check_function=check_notice) + parser_header = subparsers.add_parser("header", help="Check if code files have proper license header") + parser_header.add_argument( + "--template", + type=Path, + required=False, + default=_TEMPLATES_DIR / "license_header.tpl", + help="Path to license header template file", + ) + parser_header.set_defaults(check_function=check_header) + args = parser.parse_args() + # Run the corresponding function depending on the type of check selected by the user + args.check_function(args.template) diff --git a/.gitlab/templates/license_header.tpl b/.gitlab/templates/license_header.tpl new file mode 100644 index 0000000..ad93e24 --- /dev/null +++ b/.gitlab/templates/license_header.tpl @@ -0,0 +1,14 @@ +See the NOTICE file distributed with this work for additional information +regarding copyright ownership. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/.gitlab/templates/notice.tpl b/.gitlab/templates/notice.tpl new file mode 100644 index 0000000..92332f3 --- /dev/null +++ b/.gitlab/templates/notice.tpl @@ -0,0 +1,5 @@ +Ensembl +Copyright [2016-] EMBL-European Bioinformatics Institute + +This product includes software developed at: +- EMBL-European Bioinformatics Institute diff --git a/NOTICE b/NOTICE index 604a57d..ae774eb 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Ensembl -Copyright [2018-2024] EMBL-European Bioinformatics Institute +Copyright [2016-2024] EMBL-European Bioinformatics Institute This product includes software developed at: - EMBL-European Bioinformatics Institute diff --git a/src/python/ensembl/core/__init__.py b/src/python/ensembl/core/__init__.py index e69de29..b704884 100644 --- a/src/python/ensembl/core/__init__.py +++ b/src/python/ensembl/core/__init__.py @@ -0,0 +1,13 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.