From 502b83a0b56ec3c80df70801e016975fcc355346 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 19:36:05 -0500 Subject: [PATCH] add workaround patch of enum with colon failing CWL validation (relates to https://github.com/common-workflow-language/cwltool/issues/2071) --- CHANGES.rst | 5 +++++ tests/processes/test_convert.py | 12 +++++++++--- weaver/formats.py | 2 +- weaver/processes/convert.py | 34 ++++++++++++++++++++++++++------- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e2afd597a..a0950548a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,11 @@ Changes: Fixes: ------ +- Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) + leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation + and breaking the resulting `CWL`. Such ``enum`` will be patched with an updated ``valueFrom`` JavaScript definition + performing the equivalent ``enum`` value validation to transparently replicate the original intent (relates to + `common-workflow-language/cwltool#2071 `_). - Fix `CWL` conversion from a `OGC API - Processes` definition specifying an `I/O` with ``schema`` explicitly indicating a ``type: array`` and nested ``enum``, even if ``minOccurs: 1`` is omitted or explicitly set. - Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation. diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 94e1e3a0e..d1f419e31 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2236,29 +2236,34 @@ def test_ogcapi2cwl_process_without_extra(): @pytest.mark.parametrize( - ["input_str", "input_int", "input_float"], + ["input_str", "input_int", "input_float", "input_time"], [ # OpenAPI schema references ( {"schema": {"type": "string", "enum": ["a", "b", "c"]}}, {"schema": {"type": "integer", "enum": [1, 2, 3]}}, {"schema": {"type": "number", "format": "float", "enum": [1.2, 3.4]}}, + {"schema": {"type": "string", "format": "time", "enum": ["12:00", "24:00"]}}, ), # OGC-API input definitions ( {"data_type": "string", "allowed_values": ["a", "b", "c"]}, {"data_type": "integer", "allowed_values": [1, 2, 3]}, {"data_type": "float", "allowed_values": [1.2, 3.4]}, + {"data_type": "string", "allowed_values": ["12:00", "24:00"]}, ), ] ) -def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): +def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, input_time): """ - Test that a :term:`CWL` with pseudo-``Enum`` type has the necessary :term:`CWL` requirements to perform validation. + Test that a :term:`CWL` with pseudo-``enum`` type has the necessary :term:`CWL` requirements to perform validation. .. seealso:: - :func:`test_any2cwl_io_enum_convert` - :func:`test_any2cwl_io_enum_validate` + - :func:`weaver.processes.convert._convert_cwl_io_enum` + - :func:`weaver.processes.convert._get_cwl_js_value_from` + - :func:`weaver.processes.convert._patch_cwl_enum_js_requirement` """ href = "https://remote-server.com/processes/test-process" body = { @@ -2266,6 +2271,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): "enum-str": input_str, "enum-int": input_int, "enum-float": input_float, + "enum-time": input_time, }, "outputs": { "output": {"schema": {"type": "string", "contentMediaType": ContentType.TEXT_PLAIN}}, diff --git a/weaver/formats.py b/weaver/formats.py index 133512880..cddc7dcf9 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -828,7 +828,7 @@ def get_cwl_file_format(media_type, make_reference=False, must_exist=True, allow ``must_exist=False`` before providing it to the `CWL` I/O definition. Setting ``must_exist=False`` should be used only for literal string comparison or pre-processing steps to evaluate formats. - :param media_type: Some reference, namespace'd or literal (possibly extended) media-type string. + :param media_type: Some reference, namespaced or literal (possibly extended) media-type string. :param make_reference: Construct the full URL reference to the resolved media-type. Otherwise, return tuple details. :param must_exist: Return result only if it can be resolved to an official media-type (or synonym if enabled), otherwise ``None``. diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 0d0caf370..69d9d24d9 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -857,29 +857,48 @@ def _get_cwl_js_value_from(cwl_io_symbols, allow_unique, allow_array): def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, allow_array): # type: (Union[str, Type[null]], List[AnyValueType], IO_Select_Type, bool, bool) -> CWL_IO_Type """ - Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``Enum``-like functionality. + Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``enum``-like functionality. In the event of an explicit ``string`` as base type, :term:`CWL` directly supports ``type: enum``. Other basic - types are not directly supported, and must instead perform manual validation against the set of allowed values. + types are not directly supported, and must instead perform manual validation against the set of allowed values, + using JavaScript evaluation of the submitted values to replicate the automatic behavior performed by ``enum`` type. .. seealso:: - https://github.com/common-workflow-language/cwl-v1.2/issues/267 - https://github.com/common-workflow-language/common-workflow-language/issues/764 - https://github.com/common-workflow-language/common-workflow-language/issues/907 + Another edge-case that happens even if the base type is a ``string`` is when the enum ``symbols`` + contain an entry with a ``:`` character (e.g.: as in the case of a time ``HH:MM`` string). + In such case, :mod:`schema_salad` incorrectly parses it as if a namespaced :term:`URL` (i.e.: ``:``) + was specified. Because of this, the symbol strings get extended to an unresolvable namespace, which leads to a + partial and incorrect enum value (e.g.: ``abc:def`` becomes ``input-id#def``). This causes the resulting ``enum`` + to never accept the submitted values, or worst, causes the entire :term:`CWL` to fail validation if duplicate + values are obtained (e.g.: ``12:00`` and ``24:00`` both extend to ``input-id#00``, causing a duplicate ``00``). + To handle this case, they will also be patched with a ``valueFrom`` approach as in the case of numeric enums. + + .. seealso:: + - https://github.com/common-workflow-language/cwltool/issues/2071 + .. warning:: Because ``valueFrom`` can only be used with ``inputBinding``, any output providing a set of allowed values that are not ``string``-based will be ignored when converted to :term:`CWL` :term:`I/O`. + .. seealso:: + - :func:`_get_cwl_js_value_from` defines the ``enum``-like ``valueFrom`` checks + - :func:`_patch_cwl_enum_js_requirement` must be called on the entire :term:`CWL` to inject the + relevant :data:`CWL_REQUIREMENT_INLINE_JAVASCRIPT`, since it might not be defined in the original :term:`CWL`. + :param cwl_io_type: Basic type for which allowed values should apply. :param cwl_io_symbols: Allowed values to restrict the :term:`I/O` definition. :return: Converted definition as CWL Enum or with relevant value validation as applicable for the type. """ if cwl_io_type not in PACKAGE_BASIC_TYPES: return {} - if cwl_io_type == "string": + any_colon_symbol = any(":" in str(symbol) for symbol in cwl_io_symbols) + if cwl_io_type == "string" and not any_colon_symbol: return {"type": {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols}} - if cwl_io_type not in PACKAGE_NUMERIC_TYPES: + if cwl_io_type not in PACKAGE_NUMERIC_TYPES and not (cwl_io_type == "string" and any_colon_symbol): LOGGER.warning( "Could not resolve conversion of CWL I/O as Enum for type '%s'. " "Ignoring value validation against specified allowed values: %s.", @@ -889,9 +908,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a return {"type": cwl_io_type} if not ( - (all(isinstance(value, bool) for value in cwl_io_symbols) and cwl_io_type == "boolean") or - (all(isinstance(value, int) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_INTEGER_TYPES) or - (all(isinstance(value, float) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_FLOATING_TYPES) + (cwl_io_type == "string" and all(isinstance(value, str) for value in cwl_io_symbols)) or + (cwl_io_type == "boolean" and all(isinstance(value, bool) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_INTEGER_TYPES and all(isinstance(value, int) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_FLOATING_TYPES and all(isinstance(value, float) for value in cwl_io_symbols)) ): LOGGER.warning( "Incompatible CWL I/O type '%s' detected for specified allowed values: %s. "