From c65e9c46778256186100cbd0b5822ba09af8538a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 9 Sep 2024 22:56:57 -0400 Subject: [PATCH] jupyter notebook to CWL docs --- CHANGES.rst | 4 + docs/examples/jupyter_repo2cwl_python.py | 23 ++++ docs/source/package.rst | 128 +++++++++++++++++++++++ docs/source/references.rst | 2 + 4 files changed, 157 insertions(+) create mode 100644 docs/examples/jupyter_repo2cwl_python.py diff --git a/CHANGES.rst b/CHANGES.rst index fbe5a17e9..d0617fce7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -25,6 +25,10 @@ Changes: - Add better validation off well-known `CWL` ``$namespaces`` as reserved keywords when deploying a `Process` to ensure better interoperability between implementations and adequate metadata resolution (relates to `#463 `_). +- Add documentation about *Jupyter Notebook* to `CWL` convertion + utility `ipython2cwl `_ + and a sample `crim-ca/ncml2stac `_ repository + making use of it with the `Weaver` `CLI` to generate a deployed `OGC API - Processes` definition. Fixes: ------ diff --git a/docs/examples/jupyter_repo2cwl_python.py b/docs/examples/jupyter_repo2cwl_python.py new file mode 100644 index 000000000..8c62e6b11 --- /dev/null +++ b/docs/examples/jupyter_repo2cwl_python.py @@ -0,0 +1,23 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # This block is only evaluated by type checkers (and jupyter-repo2cwl). + # Therefore, it is not executed when running hte notebook. + # In other words, 'ipython2cwl' does not even need to be installed! + from ipython2cwl.iotypes import CWLFilePathInput, CWLFilePathOutput + +import csv +import json + +input_file: "CWLFilePathInput" = "data.csv" +with open(input_file, mode="r", encoding="utf-8") as f: + csv_reader = csv.reader(f) + data = [line for line in csv_reader if line] + +headers = data[0] +values = data[1:] +items = [{k: v} for val in values for k, v in zip(headers, val)] + +output_file: "CWLFilePathOutput" = "output.json" +with open(output_file, mode="w", encoding="utf-8") as f: + json.dump(items, f) diff --git a/docs/source/package.rst b/docs/source/package.rst index 8f87198ce..1fa01bb1e 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -95,6 +95,134 @@ provided to tell :term:`CWL` how to map :term:`Job` input values to the dynamica :caption: Sample CWL definition of a Python script :name: example_app_pkg_script +.. seealso:: + See the :ref:`app_pkg_python` section for more utilities to help create an :term:`Application Package` from Python. + +.. seealso:: + For other programing languages, see |cwl-dev-tools|_ for a list of related utilities that helps working + with :term:`CWL`, some of which offering convertion capabilities. + +.. _app_pkg_python: + +Python Applications +~~~~~~~~~~~~~~~~~~~~~~~~ + +When the :term:`Application Package` to be generated consists of a Python script, which happens to make use of +the builtin |python-argparse|_ package, it is possible to employ the |argparse2tool|_ utility, which will automatically +generate a corresponding :term:`CWL` definition using the specified :term:`CLI` arguments and their types. + +The |argparse2tool|_ utility can help quickly generate a valid :term:`CWL` definition, but it is the responsibility +of the user to validate that converted arguments have the appropriate types, or any additional metadata required to +properly describe the intended :term:`Process`. Notably, users might find the need to add appropriate ``format`` +definitions to the :term:`I/O`, since those will generally be missing descriptive :term:`Media-Types`. + +.. note:: + Although |argparse2tool|_ can help in the initial :term:`CWL` generation procedure, it is recommended to apply + additional containerization best-practices, such as described in :ref:`app_pkg_script`, to increase chances to + obtain a replicable and reusable :term:`Application Package` definition. + +.. seealso:: + For pure Python scripts not using |python-argparse|_, the |scriptcwl|_ utility can be considered instead. + +.. seealso:: + For Python code embedded in |jupyter-notebooks|_, refer to :ref:`app_pkg_jupyter_notebook` for more details. + +.. |python-argparse| replace:: ``argparse`` +.. _python-argparse: https://docs.python.org/3/library/argparse.html + +.. |argparse2tool| replace:: ``argparse2tool`` +.. _argparse2tool: https://github.com/hexylena/argparse2tool + +.. |scriptcwl| replace:: ``scriptcwl`` +.. _scriptcwl: https://github.com/NLeSC/scriptcwl + +.. _app_pkg_jupyter_notebook: + +Jupyter Notebook Applications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When working on experimental or research applications, |jupyter-notebooks|_ are a popular development environment, +due to their convenient interface for displaying results, interacting with visualization tools, or the larger plugin +ecosystem they offer. However, |jupyter-notebooks|_ are typically insufficient by themselves to describe a complete +application. To help developers transition from |jupyter-notebooks|_ to :ref:`app_pkg_docker`, which ensures the +:term:`Application Package` can be deployed and reused, the |jupyter-repo2cwl|_ utility can be employed. + +Using |jupyter-repo2cwl|_ (when installed in the Python environment), it is possible to directly convert a Git +repository reference containing |jupyter-notebooks|_ into deployable :term:`CWL` with a :term:`Docker` container. +To do this, the utility uses two strategies under the hood: + +1. |jupyterhub-repo2docker|_ is employed to convert a Git repository into a :term:`Docker` container, with any + applicable package requirements, project metadata, and advanced configuration details. +2. Python typing annotations provided by `IPython2CWL `_ define the :term:`CWL` :term:`I/O` + from variables and results located within the |jupyter-notebooks|_. + +.. note:: + Because |jupyterhub-repo2docker|_ is employed, which is highly adaptable to many use cases, all typical Python + project `Configuration Files `_, + such as ``requirements.txt``, ``environment.yml``, ``setup.py``, ``pyproject.toml``, etc. can be employed. + The :term:`Docker` container dependencies can be provided with an explicit ``Dockerfile`` as well. + Please refer to the official documentation for all advanced configuration options. + +Because Python type annotations are employed to with |jupyter-repo2cwl|_ +to indicate which variables will contain the :term:`CWL` :term:`I/O` references, it is actually possible +to annotate |jupyter-notebooks|_ *without any additional package dependencies*. To do so, one only needs +to employ *string annotations* as follows. + +.. literalinclude:: ../examples/jupyter-repo2cwl-python.py + :language: python + :caption: Sample CWL annotations of Python code in Jupyter Notebook + :name: example_app_pkg_jupyter_repo2cwl_python + +.. seealso:: + See `IPython2CWL Supported Types `_ + for more details about the mapping from a Python annotation to the resulting :term:`CWL` :ref:`cwl-io-types`. + +When the above code is saved in a |jupyter-notebooks|_ and committed to a Git repository, the |jupyterhub-repo2docker|_ +utility can automatically clone the repository, parse the Python code, extract the :term:`CWL` annotations, and +generate the :term:`Application Package` with a :term:`Docker` container containing all of their respective definitions. +All of this is accomplished with a single call to obtain a deployable :term:`CWL` in `Weaver`, which can then take over +from the :ref:`Process Deployment ` to obtain an :term:`OGC API - Process` definition. + +Jupyter Notebook to CWL Example: NCML to STAC Application +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For a more concrete example of |jupyter-notebooks|_ convertion to :term:`CWL`, see the |ncml2stac-repo|_ GitHub +repository, which contains a sample |ncml2stac-notebook|_. +This script, as indicated by its name, converts *NCML XML metadata with CMIP6 attributes* into the +corresponding *SpatioTemporal Asset Catalog* (STAC) definition and extensions. +It uses the same `IPython2CWL `_ type annotation strategy as presented +:ref:`above ` to indicate which NCML ``File`` variable is to be employed as +as the :term:`CWL` input reference, and the expected STAC ``File`` as output to be collected by :term:`CWL`. + +Using |jupyter-repo2cwl|_ and the :ref:`Weaver CLI ` in combination, as shown below, +it is possible to automatically convert the Jupyter Notebook Script into a Dockerized :term:`CWL` and +deploy it as an :term:`OGC API - Process`. + +.. code-block:: shell + :caption: *Jupyter Notebook* conversion to *CWL* and Deployment as *OGC API - Processes* + + jupyter-repo2cwl "https://github.com/crim-ca/ncml2stac" -o /tmp + weaver deploy -u http://example.com/weaver -i ncml2stac --cwl /tmp/notebooks_ncml2stac.cwl + +.. seealso:: + - Refer to the |ncml2stac-repo|_ repository's README for more details about the utilities. + - Refer to the |ncml2stac-notebook|_ for the implementation of the :term:`Application Package` script. + +.. |jupyter-notebooks| replace:: Jupyter Notebooks +.. _jupyter-notebooks: https://jupyter.org/ + +.. |jupyterhub-repo2docker| replace:: ``jupyterhub/repo2docker`` +.. _jupyterhub-repo2docker: https://github.com/jupyterhub/repo2docker + +.. |jupyter-repo2cwl| replace:: ``jupyter repo2cwl`` +.. _jupyter-repo2cwl: https://github.com/common-workflow-lab/ipython2cwl + +.. |ncml2stac-repo| replace:: ``crim-ca/ncml2stac`` +.. _ncml2stac-repo: https://github.com/crim-ca/ncml2stac/tree/main#ncml-to-stac + +.. |ncml2stac-notebook| replace:: NCML to STAC Jupyter Notebook +.. _ncml2stac-notebook: https://github.com/crim-ca/ncml2stac/blob/main/notebooks/ncml2stac.ipynb + .. _app_pkg_docker: Dockerized Applications diff --git a/docs/source/references.rst b/docs/source/references.rst index 576de94cc..241df4e52 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -34,6 +34,8 @@ .. _cwl-spec: https://www.commonwl.org/specification/ .. |cwl-guide| replace:: CWL User Guide .. _cwl-guide: http://www.commonwl.org/user_guide/ +.. |cwl-dev-tools| replace:: CWL Development Tools +.. _cwl-dev-tools: https://www.commonwl.org/tools/ .. |cwl-cmdtool| replace:: CWL CommandLineTool .. _cwl-cmdtool: https://www.commonwl.org/v1.1/CommandLineTool.html .. |cwl-workflow| replace:: CWL Workflow